src/be/nikiroo/fanfix/supported/BasicSupport.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.net.URL;
   6 import java.util.ArrayList;
   7 import java.util.Date;
   8 import java.util.HashMap;
   9 import java.util.List;
  10 import java.util.Map;
  11 import java.util.Map.Entry;
  12
  13 import org.jsoup.helper.DataUtil;
  14 import org.jsoup.nodes.Document;
  15 import org.jsoup.nodes.Element;
  16 import org.jsoup.nodes.Node;
  17
  18 import be.nikiroo.fanfix.Instance;
  19 import be.nikiroo.fanfix.bundles.StringId;
  20 import be.nikiroo.fanfix.data.Chapter;
  21 import be.nikiroo.fanfix.data.MetaData;
  22 import be.nikiroo.fanfix.data.Story;
  23 import be.nikiroo.utils.Progress;
  24 import be.nikiroo.utils.StringUtils;
  25
  26 /**
  27  * This class is the base class used by the other support classes. It can be
  28  * used outside of this package, and have static method that you can use to get
  29  * access to the correct support class.
  30  * <p>
  31  * It will be used with 'resources' (usually web pages or files).
  32  *
  33  * @author niki
  34  */
  35 public abstract class BasicSupport {
  36         private Document sourceNode;
  37         private URL source;
  38         private SupportType type;
  39         private URL currentReferer; // with only one 'r', as in 'HTTP'...
  40
  41         /**
  42          * Check if the given resource is supported by this {@link BasicSupport}.
  43          *
  44          * @param url
  45          *            the resource to check for
  46          *
  47          * @return TRUE if it is
  48          */
  49         protected abstract boolean supports(URL url);
  50
  51         /**
  52          * Return TRUE if the support will return HTML encoded content values for
  53          * the chapters content.
  54          *
  55          * @return TRUE for HTML
  56          */
  57         protected abstract boolean isHtml();
  58
  59         /**
  60          * Return the {@link MetaData} of this story.
  61          *
  62          * @return the associated {@link MetaData}, never NULL
  63          *
  64          * @throws IOException
  65          *             in case of I/O error
  66          */
  67         protected abstract MetaData getMeta() throws IOException;
  68
  69         /**
  70          * Return the story description.
  71          *
  72          * @return the description
  73          *
  74          * @throws IOException
  75          *             in case of I/O error
  76          */
  77         protected abstract String getDesc() throws IOException;
  78
  79         /**
  80          * Return the list of chapters (name and resource).
  81          * <p>
  82          * Can be NULL if this {@link BasicSupport} do no use chapters.
  83          *
  84          * @param pg
  85          *            the optional progress reporter
  86          *
  87          * @return the chapters or NULL
  88          *
  89          * @throws IOException
  90          *             in case of I/O error
  91          */
  92         protected abstract List<Entry<String, URL>> getChapters(Progress pg)
  93                         throws IOException;
  94
  95         /**
  96          * Return the content of the chapter (possibly HTML encoded, if
  97          * {@link BasicSupport#isHtml()} is TRUE).
  98          *
  99          * @param chapUrl
 100          *            the chapter {@link URL}
 101          * @param number
 102          *            the chapter number
 103          * @param pg
 104          *            the optional progress reporter
 105          *
 106          * @return the content
 107          *
 108          * @throws IOException
 109          *             in case of I/O error
 110          */
 111         protected abstract String getChapterContent(URL chapUrl, int number,
 112                         Progress pg) throws IOException;
 113
 114         /**
 115          * Return the list of cookies (values included) that must be used to
 116          * correctly fetch the resources.
 117          * <p>
 118          * You are expected to call the super method implementation if you override
 119          * it.
 120          *
 121          * @return the cookies
 122          */
 123         public Map<String, String> getCookies() {
 124                 return new HashMap<String, String>();
 125         }
 126
 127         /**
 128          * OAuth authorisation (aka, "bearer XXXXXXX").
 129          *
 130          * @return the OAuth string
 131          */
 132         public String getOAuth() {
 133                 return null;
 134         }
 135
 136         /**
 137          * Return the canonical form of the main {@link URL}.
 138          *
 139          * @param source
 140          *            the source {@link URL}, which can be NULL
 141          *
 142          * @return the canonical form of this {@link URL} or NULL if the source was
 143          *         NULL
 144          */
 145         protected URL getCanonicalUrl(URL source) {
 146                 return source;
 147         }
 148
 149         /**
 150          * The main {@link Node} for this {@link Story}.
 151          *
 152          * @return the node
 153          */
 154         protected Element getSourceNode() {
 155                 return sourceNode;
 156         }
 157
 158         /**
 159          * The main {@link URL} for this {@link Story}.
 160          *
 161          * @return the URL
 162          */
 163         protected URL getSource() {
 164                 return source;
 165         }
 166
 167         /**
 168          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 169          * the current {@link URL} we work on.
 170          *
 171          * @return the referer
 172          */
 173         public URL getCurrentReferer() {
 174                 return currentReferer;
 175         }
 176
 177         /**
 178          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 179          * the current {@link URL} we work on.
 180          *
 181          * @param currentReferer
 182          *            the new referer
 183          */
 184         protected void setCurrentReferer(URL currentReferer) {
 185                 this.currentReferer = currentReferer;
 186         }
 187
 188         /**
 189          * The support type.
 190          *
 191          * @return the type
 192          */
 193         public SupportType getType() {
 194                 return type;
 195         }
 196
 197         /**
 198          * The support type.
 199          *
 200          * @param type
 201          *            the new type
 202          */
 203         protected void setType(SupportType type) {
 204                 this.type = type;
 205         }
 206
 207         /**
 208          * Open an input link that will be used for the support.
 209          * <p>
 210          * Can return NULL, in which case you are supposed to work without a source
 211          * node.
 212          *
 213          * @param source
 214          *            the source {@link URL}
 215          *
 216          * @return the {@link InputStream}
 217          *
 218          * @throws IOException
 219          *             in case of I/O error
 220          */
 221         protected Document loadDocument(URL source) throws IOException {
 222                 String url = getCanonicalUrl(source).toString();
 223                 return DataUtil.load(Instance.getCache().open(source, this, false),
 224                                 "UTF-8", url.toString());
 225         }
 226
 227         /**
 228          * Log into the support (can be a no-op depending upon the support).
 229          *
 230          * @throws IOException
 231          *             in case of I/O error
 232          */
 233         protected void login() throws IOException {
 234         }
 235
 236         /**
 237          * Now that we have processed the {@link Story}, close the resources if any.
 238          */
 239         protected void close() {
 240                 setCurrentReferer(null);
 241         }
 242
 243         /**
 244          * Process the given story resource into a partially filled {@link Story}
 245          * object containing the name and metadata.
 246          *
 247          * @param getDesc
 248          *            retrieve the description of the story, or not
 249          * @param pg
 250          *            the optional progress reporter
 251          *
 252          * @return the {@link Story}, never NULL
 253          *
 254          * @throws IOException
 255          *             in case of I/O error
 256          */
 257         protected Story processMeta(boolean getDesc, Progress pg)
 258                         throws IOException {
 259                 if (pg == null) {
 260                         pg = new Progress();
 261                 } else {
 262                         pg.setMinMax(0, 100);
 263                 }
 264
 265                 pg.setProgress(30);
 266
 267                 Story story = new Story();
 268                 MetaData meta = getMeta();
 269                 if (meta.getCreationDate() == null || meta.getCreationDate().isEmpty()) {
 270                         meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
 271                 }
 272                 story.setMeta(meta);
 273
 274                 pg.setProgress(50);
 275
 276                 if (meta.getCover() == null) {
 277                         meta.setCover(BasicSupportHelper.getDefaultCover(meta.getSubject()));
 278                 }
 279
 280                 pg.setProgress(60);
 281
 282                 if (getDesc) {
 283                         String descChapterName = Instance.getTrans().getString(
 284                                         StringId.DESCRIPTION);
 285                         story.getMeta().setResume(
 286                                         BasicSupportPara.makeChapter(this, source, 0,
 287                                                         descChapterName, //
 288                                                         getDesc(), isHtml(), null));
 289                 }
 290
 291                 pg.done();
 292                 return story;
 293         }
 294
 295         /**
 296          * Process the given story resource into a fully filled {@link Story}
 297          * object.
 298          *
 299          * @param pg
 300          *            the optional progress reporter
 301          *
 302          * @return the {@link Story}, never NULL
 303          *
 304          * @throws IOException
 305          *             in case of I/O error
 306          */
 307         // ADD final when BasicSupport_Deprecated is gone
 308         public Story process(Progress pg) throws IOException {
 309                 setCurrentReferer(source);
 310                 login();
 311                 sourceNode = loadDocument(source);
 312
 313                 try {
 314                         return doProcess(pg);
 315                 } finally {
 316                         close();
 317                 }
 318         }
 319
 320         /**
 321          * Actual processing step, without the calls to other methods.
 322          * <p>
 323          * Will convert the story resource into a fully filled {@link Story} object.
 324          *
 325          * @param pg
 326          *            the optional progress reporter
 327          *
 328          * @return the {@link Story}, never NULL
 329          *
 330          * @throws IOException
 331          *             in case of I/O error
 332          */
 333         protected Story doProcess(Progress pg) throws IOException {
 334                 if (pg == null) {
 335                         pg = new Progress();
 336                 } else {
 337                         pg.setMinMax(0, 100);
 338                 }
 339
 340                 pg.setProgress(1);
 341                 Progress pgMeta = new Progress();
 342                 pg.addProgress(pgMeta, 10);
 343                 Story story = processMeta(true, pgMeta);
 344                 pgMeta.done(); // 10%
 345
 346                 pg.setName("Retrieving " + story.getMeta().getTitle());
 347
 348                 Progress pgGetChapters = new Progress();
 349                 pg.addProgress(pgGetChapters, 10);
 350                 story.setChapters(new ArrayList<Chapter>());
 351                 List<Entry<String, URL>> chapters = getChapters(pgGetChapters);
 352                 pgGetChapters.done(); // 20%
 353
 354                 if (chapters != null) {
 355                         Progress pgChaps = new Progress("Extracting chapters", 0,
 356                                         chapters.size() * 300);
 357                         pg.addProgress(pgChaps, 80);
 358
 359                         long words = 0;
 360                         int i = 1;
 361                         for (Entry<String, URL> chap : chapters) {
 362                                 pgChaps.setName("Extracting chapter " + i);
 363                                 URL chapUrl = chap.getValue();
 364                                 String chapName = chap.getKey();
 365                                 if (chapUrl != null) {
 366                                         setCurrentReferer(chapUrl);
 367                                 }
 368
 369                                 pgChaps.setProgress(i * 100);
 370                                 Progress pgGetChapterContent = new Progress();
 371                                 Progress pgMakeChapter = new Progress();
 372                                 pgChaps.addProgress(pgGetChapterContent, 100);
 373                                 pgChaps.addProgress(pgMakeChapter, 100);
 374
 375                                 String content = getChapterContent(chapUrl, i,
 376                                                 pgGetChapterContent);
 377                                 pgGetChapterContent.done();
 378                                 Chapter cc = BasicSupportPara.makeChapter(this, chapUrl, i,
 379                                                 chapName, content, isHtml(), pgMakeChapter);
 380                                 pgMakeChapter.done();
 381
 382                                 words += cc.getWords();
 383                                 story.getChapters().add(cc);
 384                                 story.getMeta().setWords(words);
 385
 386                                 i++;
 387                         }
 388
 389                         pgChaps.setName("Extracting chapters");
 390                         pgChaps.done();
 391                 }
 392
 393                 pg.done();
 394
 395                 return story;
 396         }
 397
 398         /**
 399          * Return a {@link BasicSupport} implementation supporting the given
 400          * resource if possible.
 401          *
 402          * @param url
 403          *            the story resource
 404          *
 405          * @return an implementation that supports it, or NULL
 406          */
 407         public static BasicSupport getSupport(URL url) {
 408                 if (url == null) {
 409                         return null;
 410                 }
 411
 412                 // TEXT and INFO_TEXT always support files (not URLs though)
 413                 for (SupportType type : SupportType.values()) {
 414                         if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
 415                                 BasicSupport support = getSupport(type, url);
 416                                 if (support != null && support.supports(url)) {
 417                                         return support;
 418                                 }
 419                         }
 420                 }
 421
 422                 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
 423                                 SupportType.TEXT }) {
 424                         BasicSupport support = getSupport(type, url);
 425                         if (support != null && support.supports(url)) {
 426                                 return support;
 427                         }
 428                 }
 429
 430                 return null;
 431         }
 432
 433         /**
 434          * Return a {@link BasicSupport} implementation supporting the given type.
 435          *
 436          * @param type
 437          *            the type
 438          * @param url
 439          *            the {@link URL} to support (can be NULL to get an
 440          *            "abstract support"; if not NULL, will be used as the source
 441          *            URL)
 442          *
 443          * @return an implementation that supports it, or NULL
 444          */
 445         public static BasicSupport getSupport(SupportType type, URL url) {
 446                 BasicSupport support = null;
 447
 448                 switch (type) {
 449                 case EPUB:
 450                         support = new Epub();
 451                         break;
 452                 case INFO_TEXT:
 453                         support = new InfoText();
 454                         break;
 455                 case FIMFICTION:
 456                         try {
 457                                 // Can fail if no client key or NO in options
 458                                 support = new FimfictionApi();
 459                         } catch (IOException e) {
 460                                 support = new Fimfiction();
 461                         }
 462                         break;
 463                 case FANFICTION:
 464                         support = new Fanfiction();
 465                         break;
 466                 case TEXT:
 467                         support = new Text();
 468                         break;
 469                 case MANGAFOX:
 470                         support = new MangaFox();
 471                         break;
 472                 case E621:
 473                         support = new E621();
 474                         break;
 475                 case YIFFSTAR:
 476                         support = new YiffStar();
 477                         break;
 478                 case E_HENTAI:
 479                         support = new EHentai();
 480                         break;
 481                 case MANGA_LEL:
 482                         support = new MangaLel();
 483                         break;
 484                 case CBZ:
 485                         support = new Cbz();
 486                         break;
 487                 case HTML:
 488                         support = new Html();
 489                         break;
 490                 }
 491
 492                 if (support != null) {
 493                         support.setType(type);
 494                         support.source = support.getCanonicalUrl(url);
 495                 }
 496
 497                 return support;
 498         }
 499 }