src/be/nikiroo/fanfix/supported/BasicSupport.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.awt.image.BufferedImage;
   4 import java.io.BufferedReader;
   5 import java.io.ByteArrayInputStream;
   6 import java.io.File;
   7 import java.io.IOException;
   8 import java.io.InputStream;
   9 import java.io.InputStreamReader;
  10 import java.net.MalformedURLException;
  11 import java.net.URL;
  12 import java.util.ArrayList;
  13 import java.util.Date;
  14 import java.util.HashMap;
  15 import java.util.List;
  16 import java.util.Map;
  17 import java.util.Map.Entry;
  18 import java.util.Scanner;
  19
  20 import be.nikiroo.fanfix.Instance;
  21 import be.nikiroo.fanfix.bundles.Config;
  22 import be.nikiroo.fanfix.bundles.StringId;
  23 import be.nikiroo.fanfix.data.Chapter;
  24 import be.nikiroo.fanfix.data.MetaData;
  25 import be.nikiroo.fanfix.data.Paragraph;
  26 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
  27 import be.nikiroo.fanfix.data.Story;
  28 import be.nikiroo.utils.IOUtils;
  29 import be.nikiroo.utils.Progress;
  30 import be.nikiroo.utils.StringUtils;
  31
  32 /**
  33  * This class is the base class used by the other support classes. It can be
  34  * used outside of this package, and have static method that you can use to get
  35  * access to the correct support class.
  36  * <p>
  37  * It will be used with 'resources' (usually web pages or files).
  38  *
  39  * @author niki
  40  */
  41 public abstract class BasicSupport {
  42         /**
  43          * The supported input types for which we can get a {@link BasicSupport}
  44          * object.
  45          *
  46          * @author niki
  47          */
  48         public enum SupportType {
  49                 /** EPUB files created with this program */
  50                 EPUB,
  51                 /** Pure text file with some rules */
  52                 TEXT,
  53                 /** TEXT but with associated .info file */
  54                 INFO_TEXT,
  55                 /** My Little Pony fanfictions */
  56                 FIMFICTION,
  57                 /** Fanfictions from a lot of different universes */
  58                 FANFICTION,
  59                 /** Website with lots of Mangas */
  60                 MANGAFOX,
  61                 /** Furry website with comics support */
  62                 E621,
  63                 /** Furry website with stories */
  64                 YIFFSTAR,
  65                 /** Comics and images groups, mostly but not only NSFW */
  66                 E_HENTAI,
  67                 /** CBZ files */
  68                 CBZ,
  69                 /** HTML files */
  70                 HTML;
  71
  72                 /**
  73                  * A description of this support type (more information than the
  74                  * {@link BasicSupport#getSourceName()}).
  75                  *
  76                  * @return the description
  77                  */
  78                 public String getDesc() {
  79                         String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
  80                                         this.name());
  81
  82                         if (desc == null) {
  83                                 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
  84                         }
  85
  86                         return desc;
  87                 }
  88
  89                 /**
  90                  * The name of this support type (a short version).
  91                  *
  92                  * @return the name
  93                  */
  94                 public String getSourceName() {
  95                         BasicSupport support = BasicSupport.getSupport(this);
  96                         if (support != null) {
  97                                 return support.getSourceName();
  98                         }
  99
 100                         return null;
 101                 }
 102
 103                 @Override
 104                 public String toString() {
 105                         return super.toString().toLowerCase();
 106                 }
 107
 108                 /**
 109                  * Call {@link SupportType#valueOf(String.toUpperCase())}.
 110                  *
 111                  * @param typeName
 112                  *            the possible type name
 113                  *
 114                  * @return NULL or the type
 115                  */
 116                 public static SupportType valueOfUC(String typeName) {
 117                         return SupportType.valueOf(typeName == null ? null : typeName
 118                                         .toUpperCase());
 119                 }
 120
 121                 /**
 122                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 123                  * NULL for NULL instead of raising exception.
 124                  *
 125                  * @param typeName
 126                  *            the possible type name
 127                  *
 128                  * @return NULL or the type
 129                  */
 130                 public static SupportType valueOfNullOkUC(String typeName) {
 131                         if (typeName == null) {
 132                                 return null;
 133                         }
 134
 135                         return SupportType.valueOfUC(typeName);
 136                 }
 137
 138                 /**
 139                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 140                  * NULL in case of error instead of raising an exception.
 141                  *
 142                  * @param typeName
 143                  *            the possible type name
 144                  *
 145                  * @return NULL or the type
 146                  */
 147                 public static SupportType valueOfAllOkUC(String typeName) {
 148                         try {
 149                                 return SupportType.valueOfUC(typeName);
 150                         } catch (Exception e) {
 151                                 return null;
 152                         }
 153                 }
 154         }
 155
 156         private InputStream in;
 157         private SupportType type;
 158         private URL currentReferer; // with only one 'r', as in 'HTTP'...
 159
 160         // quote chars
 161         private char openQuote = Instance.getTrans().getCharacter(
 162                         StringId.OPEN_SINGLE_QUOTE);
 163         private char closeQuote = Instance.getTrans().getCharacter(
 164                         StringId.CLOSE_SINGLE_QUOTE);
 165         private char openDoubleQuote = Instance.getTrans().getCharacter(
 166                         StringId.OPEN_DOUBLE_QUOTE);
 167         private char closeDoubleQuote = Instance.getTrans().getCharacter(
 168                         StringId.CLOSE_DOUBLE_QUOTE);
 169
 170         /**
 171          * The name of this support class.
 172          *
 173          * @return the name
 174          */
 175         protected abstract String getSourceName();
 176
 177         /**
 178          * Check if the given resource is supported by this {@link BasicSupport}.
 179          *
 180          * @param url
 181          *            the resource to check for
 182          *
 183          * @return TRUE if it is
 184          */
 185         protected abstract boolean supports(URL url);
 186
 187         /**
 188          * Return TRUE if the support will return HTML encoded content values for
 189          * the chapters content.
 190          *
 191          * @return TRUE for HTML
 192          */
 193         protected abstract boolean isHtml();
 194
 195         protected abstract MetaData getMeta(URL source, InputStream in)
 196                         throws IOException;
 197
 198         /**
 199          * Return the story description.
 200          *
 201          * @param source
 202          *            the source of the story
 203          * @param in
 204          *            the input (the main resource)
 205          *
 206          * @return the description
 207          *
 208          * @throws IOException
 209          *             in case of I/O error
 210          */
 211         protected abstract String getDesc(URL source, InputStream in)
 212                         throws IOException;
 213
 214         /**
 215          * Return the list of chapters (name and resource).
 216          *
 217          * @param source
 218          *            the source of the story
 219          * @param in
 220          *            the input (the main resource)
 221          * @param pg
 222          *            the optional progress reporter
 223          *
 224          * @return the chapters
 225          *
 226          * @throws IOException
 227          *             in case of I/O error
 228          */
 229         protected abstract List<Entry<String, URL>> getChapters(URL source,
 230                         InputStream in, Progress pg) throws IOException;
 231
 232         /**
 233          * Return the content of the chapter (possibly HTML encoded, if
 234          * {@link BasicSupport#isHtml()} is TRUE).
 235          *
 236          * @param source
 237          *            the source of the story
 238          * @param in
 239          *            the input (the main resource)
 240          * @param number
 241          *            the chapter number
 242          * @param pg
 243          *            the optional progress reporter
 244          *
 245          * @return the content
 246          *
 247          * @throws IOException
 248          *             in case of I/O error
 249          */
 250         protected abstract String getChapterContent(URL source, InputStream in,
 251                         int number, Progress pg) throws IOException;
 252
 253         /**
 254          * Log into the support (can be a no-op depending upon the support).
 255          *
 256          * @throws IOException
 257          *             in case of I/O error
 258          */
 259         public void login() throws IOException {
 260
 261         }
 262
 263         /**
 264          * Return the list of cookies (values included) that must be used to
 265          * correctly fetch the resources.
 266          * <p>
 267          * You are expected to call the super method implementation if you override
 268          * it.
 269          *
 270          * @return the cookies
 271          *
 272          * @throws IOException
 273          *             in case of I/O error
 274          */
 275         public Map<String, String> getCookies() throws IOException {
 276                 return new HashMap<String, String>();
 277         }
 278
 279         /**
 280          * Return the canonical form of the main {@link URL}.
 281          *
 282          * @param source
 283          *            the source {@link URL}
 284          *
 285          * @return the canonical form of this {@link URL}
 286          *
 287          * @throws IOException
 288          *             in case of I/O error
 289          */
 290         public URL getCanonicalUrl(URL source) throws IOException {
 291                 return source;
 292         }
 293
 294         /**
 295          * Process the given story resource into a partially filled {@link Story}
 296          * object containing the name and metadata, except for the description.
 297          *
 298          * @param url
 299          *            the story resource
 300          *
 301          * @return the {@link Story}
 302          *
 303          * @throws IOException
 304          *             in case of I/O error
 305          */
 306         public Story processMeta(URL url) throws IOException {
 307                 return processMeta(url, true, false, null);
 308         }
 309
 310         /**
 311          * Process the given story resource into a partially filled {@link Story}
 312          * object containing the name and metadata.
 313          *
 314          * @param url
 315          *            the story resource
 316          *
 317          * @param close
 318          *            close "this" and "in" when done
 319          * @param pg
 320          *            the optional progress reporter
 321          *
 322          * @return the {@link Story}
 323          *
 324          * @throws IOException
 325          *             in case of I/O error
 326          */
 327         protected Story processMeta(URL url, boolean close, boolean getDesc,
 328                         Progress pg) throws IOException {
 329                 if (pg == null) {
 330                         pg = new Progress();
 331                 } else {
 332                         pg.setMinMax(0, 100);
 333                 }
 334
 335                 login();
 336                 pg.setProgress(10);
 337
 338                 url = getCanonicalUrl(url);
 339
 340                 setCurrentReferer(url);
 341
 342                 in = openInput(url);
 343                 if (in == null) {
 344                         return null;
 345                 }
 346
 347                 try {
 348                         preprocess(url, getInput());
 349                         pg.setProgress(30);
 350
 351                         Story story = new Story();
 352                         MetaData meta = getMeta(url, getInput());
 353                         if (meta.getCreationDate() == null
 354                                         || meta.getCreationDate().isEmpty()) {
 355                                 meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
 356                         }
 357                         story.setMeta(meta);
 358
 359                         pg.setProgress(50);
 360
 361                         if (meta != null && meta.getCover() == null) {
 362                                 meta.setCover(getDefaultCover(meta.getSubject()));
 363                         }
 364
 365                         pg.setProgress(60);
 366
 367                         if (getDesc) {
 368                                 String descChapterName = Instance.getTrans().getString(
 369                                                 StringId.DESCRIPTION);
 370                                 story.getMeta().setResume(
 371                                                 makeChapter(url, 0, descChapterName,
 372                                                                 getDesc(url, getInput()), null));
 373                         }
 374
 375                         pg.setProgress(100);
 376                         return story;
 377                 } finally {
 378                         if (close) {
 379                                 try {
 380                                         close();
 381                                 } catch (IOException e) {
 382                                         Instance.syserr(e);
 383                                 }
 384
 385                                 if (in != null) {
 386                                         in.close();
 387                                 }
 388                         }
 389
 390                         setCurrentReferer(null);
 391                 }
 392         }
 393
 394         /**
 395          * Process the given story resource into a fully filled {@link Story}
 396          * object.
 397          *
 398          * @param url
 399          *            the story resource
 400          * @param pg
 401          *            the optional progress reporter
 402          *
 403          * @return the {@link Story}
 404          *
 405          * @throws IOException
 406          *             in case of I/O error
 407          */
 408         public Story process(URL url, Progress pg) throws IOException {
 409                 if (pg == null) {
 410                         pg = new Progress();
 411                 } else {
 412                         pg.setMinMax(0, 100);
 413                 }
 414
 415                 url = getCanonicalUrl(url);
 416                 pg.setProgress(1);
 417                 try {
 418                         Progress pgMeta = new Progress();
 419                         pg.addProgress(pgMeta, 10);
 420                         Story story = processMeta(url, false, true, pgMeta);
 421                         if (!pgMeta.isDone()) {
 422                                 pgMeta.setProgress(pgMeta.getMax()); // 10%
 423                         }
 424
 425                         if (story == null) {
 426                                 pg.setProgress(90);
 427                                 return null;
 428                         }
 429
 430                         pg.setName("Retrieving " + story.getMeta().getTitle());
 431
 432                         setCurrentReferer(url);
 433
 434                         Progress pgGetChapters = new Progress();
 435                         pg.addProgress(pgGetChapters, 10);
 436                         story.setChapters(new ArrayList<Chapter>());
 437                         List<Entry<String, URL>> chapters = getChapters(url, getInput(),
 438                                         pgGetChapters);
 439                         if (!pgGetChapters.isDone()) {
 440                                 pgGetChapters.setProgress(pgGetChapters.getMax()); // 20%
 441                         }
 442
 443                         if (chapters != null) {
 444                                 Progress pgChaps = new Progress("Extracting chapters", 0,
 445                                                 chapters.size() * 300);
 446                                 pg.addProgress(pgChaps, 80);
 447
 448                                 long words = 0;
 449                                 int i = 1;
 450                                 for (Entry<String, URL> chap : chapters) {
 451                                         pgChaps.setName("Extracting chapter " + i);
 452                                         setCurrentReferer(chap.getValue());
 453                                         InputStream chapIn = Instance.getCache().open(
 454                                                         chap.getValue(), this, true);
 455                                         pgChaps.setProgress(i * 100);
 456                                         try {
 457                                                 Progress pgGetChapterContent = new Progress();
 458                                                 Progress pgMakeChapter = new Progress();
 459                                                 pgChaps.addProgress(pgGetChapterContent, 100);
 460                                                 pgChaps.addProgress(pgMakeChapter, 100);
 461
 462                                                 String content = getChapterContent(url, chapIn, i,
 463                                                                 pgGetChapterContent);
 464                                                 if (!pgGetChapterContent.isDone()) {
 465                                                         pgGetChapterContent.setProgress(pgGetChapterContent
 466                                                                         .getMax());
 467                                                 }
 468
 469                                                 Chapter cc = makeChapter(url, i, chap.getKey(),
 470                                                                 content, pgMakeChapter);
 471                                                 if (!pgMakeChapter.isDone()) {
 472                                                         pgMakeChapter.setProgress(pgMakeChapter.getMax());
 473                                                 }
 474
 475                                                 words += cc.getWords();
 476                                                 story.getChapters().add(cc);
 477                                                 if (story.getMeta() != null) {
 478                                                         story.getMeta().setWords(words);
 479                                                 }
 480                                         } finally {
 481                                                 chapIn.close();
 482                                         }
 483
 484                                         i++;
 485                                 }
 486
 487                                 pgChaps.setName("Extracting chapters");
 488                         } else {
 489                                 pg.setProgress(80);
 490                         }
 491
 492                         return story;
 493
 494                 } finally {
 495                         try {
 496                                 close();
 497                         } catch (IOException e) {
 498                                 Instance.syserr(e);
 499                         }
 500
 501                         if (in != null) {
 502                                 in.close();
 503                         }
 504
 505                         setCurrentReferer(null);
 506                 }
 507         }
 508
 509         /**
 510          * The support type.
 511          *
 512          * @return the type
 513          */
 514         public SupportType getType() {
 515                 return type;
 516         }
 517
 518         /**
 519          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 520          * the current {@link URL} we work on.
 521          *
 522          * @return the referer
 523          */
 524         public URL getCurrentReferer() {
 525                 return currentReferer;
 526         }
 527
 528         /**
 529          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 530          * the current {@link URL} we work on.
 531          *
 532          * @param currentReferer
 533          *            the new referer
 534          */
 535         protected void setCurrentReferer(URL currentReferer) {
 536                 this.currentReferer = currentReferer;
 537         }
 538
 539         /**
 540          * The support type.
 541          *
 542          * @param type
 543          *            the new type
 544          *
 545          * @return this
 546          */
 547         protected BasicSupport setType(SupportType type) {
 548                 this.type = type;
 549                 return this;
 550         }
 551
 552         /**
 553          * Prepare the support if needed before processing.
 554          *
 555          * @param source
 556          *            the source of the story
 557          * @param in
 558          *            the input (the main resource)
 559          *
 560          * @throws IOException
 561          *             on I/O error
 562          */
 563         protected void preprocess(URL source, InputStream in) throws IOException {
 564         }
 565
 566         /**
 567          * Now that we have processed the {@link Story}, close the resources if any.
 568          *
 569          * @throws IOException
 570          *             on I/O error
 571          */
 572         protected void close() throws IOException {
 573         }
 574
 575         /**
 576          * Create a {@link Chapter} object from the given information, formatting
 577          * the content as it should be.
 578          *
 579          * @param number
 580          *            the chapter number
 581          * @param name
 582          *            the chapter name
 583          * @param content
 584          *            the chapter content
 585          * @param pg
 586          *            the optional progress reporter
 587          *
 588          * @return the {@link Chapter}
 589          *
 590          * @throws IOException
 591          *             in case of I/O error
 592          */
 593         protected Chapter makeChapter(URL source, int number, String name,
 594                         String content, Progress pg) throws IOException {
 595                 // Chapter name: process it correctly, then remove the possible
 596                 // redundant "Chapter x: " in front of it, or "-" (as in
 597                 // "Chapter 5: - Fun!" after the ": " was automatically added)
 598                 String chapterName = processPara(name).getContent().trim();
 599                 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
 600                                 .split(",")) {
 601                         String chapterWord = Instance.getConfig().getStringX(
 602                                         Config.CHAPTER, lang);
 603                         if (chapterName.startsWith(chapterWord)) {
 604                                 chapterName = chapterName.substring(chapterWord.length())
 605                                                 .trim();
 606                                 break;
 607                         }
 608                 }
 609
 610                 if (chapterName.startsWith(Integer.toString(number))) {
 611                         chapterName = chapterName.substring(
 612                                         Integer.toString(number).length()).trim();
 613                 }
 614
 615                 while (chapterName.startsWith(":") || chapterName.startsWith("-")) {
 616                         chapterName = chapterName.substring(1).trim();
 617                 }
 618                 //
 619
 620                 Chapter chap = new Chapter(number, chapterName);
 621
 622                 if (content != null) {
 623                         List<Paragraph> paras = makeParagraphs(source, content, pg);
 624                         long words = 0;
 625                         for (Paragraph para : paras) {
 626                                 words += para.getWords();
 627                         }
 628                         chap.setParagraphs(paras);
 629                         chap.setWords(words);
 630                 }
 631
 632                 return chap;
 633
 634         }
 635
 636         /**
 637          * Convert the given content into {@link Paragraph}s.
 638          *
 639          * @param source
 640          *            the source URL of the story
 641          * @param content
 642          *            the textual content
 643          * @param pg
 644          *            the optional progress reporter
 645          *
 646          * @return the {@link Paragraph}s
 647          *
 648          * @throws IOException
 649          *             in case of I/O error
 650          */
 651         protected List<Paragraph> makeParagraphs(URL source, String content,
 652                         Progress pg) throws IOException {
 653                 if (pg == null) {
 654                         pg = new Progress();
 655                 }
 656
 657                 if (isHtml()) {
 658                         // Special <HR> processing:
 659                         content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
 660                                         "<br/>* * *<br/>");
 661                 }
 662
 663                 List<Paragraph> paras = new ArrayList<Paragraph>();
 664
 665                 if (content != null && !content.trim().isEmpty()) {
 666                         if (isHtml()) {
 667                                 String[] tab = content.split("(<p>|</p>|<br>|<br/>)");
 668                                 pg.setMinMax(0, tab.length);
 669                                 int i = 1;
 670                                 for (String line : tab) {
 671                                         if (line.startsWith("[") && line.endsWith("]")) {
 672                                                 pg.setName("Extracting image " + i);
 673                                         }
 674                                         paras.add(makeParagraph(source, line.trim()));
 675                                         pg.setProgress(i++);
 676                                 }
 677                                 pg.setName(null);
 678                         } else {
 679                                 List<String> lines = new ArrayList<String>();
 680                                 BufferedReader buff = null;
 681                                 try {
 682                                         buff = new BufferedReader(
 683                                                         new InputStreamReader(new ByteArrayInputStream(
 684                                                                         content.getBytes("UTF-8")), "UTF-8"));
 685                                         for (String line = buff.readLine(); line != null; line = buff
 686                                                         .readLine()) {
 687                                                 lines.add(line.trim());
 688                                         }
 689                                 } finally {
 690                                         if (buff != null) {
 691                                                 buff.close();
 692                                         }
 693                                 }
 694
 695                                 pg.setMinMax(0, lines.size());
 696                                 int i = 0;
 697                                 for (String line : lines) {
 698                                         if (line.startsWith("[") && line.endsWith("]")) {
 699                                                 pg.setName("Extracting image " + i);
 700                                         }
 701                                         paras.add(makeParagraph(source, line));
 702                                         pg.setProgress(i++);
 703                                 }
 704                                 pg.setName(null);
 705                         }
 706
 707                         // Check quotes for "bad" format
 708                         List<Paragraph> newParas = new ArrayList<Paragraph>();
 709                         for (Paragraph para : paras) {
 710                                 newParas.addAll(requotify(para));
 711                         }
 712                         paras = newParas;
 713
 714                         // Remove double blanks/brks
 715                         fixBlanksBreaks(paras);
 716                 }
 717
 718                 return paras;
 719         }
 720
 721         /**
 722          * Convert the given line into a single {@link Paragraph}.
 723          *
 724          * @param source
 725          *            the source URL of the story
 726          * @param line
 727          *            the textual content of the paragraph
 728          *
 729          * @return the {@link Paragraph}
 730          */
 731         private Paragraph makeParagraph(URL source, String line) {
 732                 URL image = null;
 733                 if (line.startsWith("[") && line.endsWith("]")) {
 734                         image = getImageUrl(this, source,
 735                                         line.substring(1, line.length() - 1).trim());
 736                 }
 737
 738                 if (image != null) {
 739                         return new Paragraph(image);
 740                 } else {
 741                         return processPara(line);
 742                 }
 743         }
 744
 745         /**
 746          * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
 747          * those {@link Paragraph}s.
 748          * <p>
 749          * The resulting list will not contain a starting or trailing blank/break
 750          * nor 2 blanks or breaks following each other.
 751          *
 752          * @param paras
 753          *            the list of {@link Paragraph}s to fix
 754          */
 755         protected void fixBlanksBreaks(List<Paragraph> paras) {
 756                 boolean space = false;
 757                 boolean brk = true;
 758                 for (int i = 0; i < paras.size(); i++) {
 759                         Paragraph para = paras.get(i);
 760                         boolean thisSpace = para.getType() == ParagraphType.BLANK;
 761                         boolean thisBrk = para.getType() == ParagraphType.BREAK;
 762
 763                         if (i > 0 && space && thisBrk) {
 764                                 paras.remove(i - 1);
 765                                 i--;
 766                         } else if ((space || brk) && (thisSpace || thisBrk)) {
 767                                 paras.remove(i);
 768                                 i--;
 769                         }
 770
 771                         space = thisSpace;
 772                         brk = thisBrk;
 773                 }
 774
 775                 // Remove blank/brk at start
 776                 if (paras.size() > 0
 777                                 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
 778                                                 0).getType() == ParagraphType.BREAK)) {
 779                         paras.remove(0);
 780                 }
 781
 782                 // Remove blank/brk at end
 783                 int last = paras.size() - 1;
 784                 if (paras.size() > 0
 785                                 && (paras.get(last).getType() == ParagraphType.BLANK || paras
 786                                                 .get(last).getType() == ParagraphType.BREAK)) {
 787                         paras.remove(last);
 788                 }
 789         }
 790
 791         /**
 792          * Get the default cover related to this subject (see <tt>.info</tt> files).
 793          *
 794          * @param subject
 795          *            the subject
 796          *
 797          * @return the cover if any, or NULL
 798          */
 799         static BufferedImage getDefaultCover(String subject) {
 800                 if (subject != null && !subject.isEmpty()
 801                                 && Instance.getCoverDir() != null) {
 802                         try {
 803                                 File fileCover = new File(Instance.getCoverDir(), subject);
 804                                 return getImage(null, fileCover.toURI().toURL(), subject);
 805                         } catch (MalformedURLException e) {
 806                         }
 807                 }
 808
 809                 return null;
 810         }
 811
 812         /**
 813          * Return the list of supported image extensions.
 814          *
 815          * @param emptyAllowed
 816          *            TRUE to allow an empty extension on first place, which can be
 817          *            used when you may already have an extension in your input but
 818          *            are not sure about it
 819          *
 820          * @return the extensions
 821          */
 822         static String[] getImageExt(boolean emptyAllowed) {
 823                 if (emptyAllowed) {
 824                         return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 825                 } else {
 826                         return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 827                 }
 828         }
 829
 830         /**
 831          * Check if the given resource can be a local image or a remote image, then
 832          * refresh the cache with it if it is.
 833          *
 834          * @param source
 835          *            the story source
 836          * @param line
 837          *            the resource to check
 838          *
 839          * @return the image if found, or NULL
 840          *
 841          */
 842         static BufferedImage getImage(BasicSupport support, URL source, String line) {
 843                 URL url = getImageUrl(support, source, line);
 844                 if (url != null) {
 845                         InputStream in = null;
 846                         try {
 847                                 in = Instance.getCache().open(url, getSupport(url), true);
 848                                 return IOUtils.toImage(in);
 849                         } catch (IOException e) {
 850                         } finally {
 851                                 if (in != null) {
 852                                         try {
 853                                                 in.close();
 854                                         } catch (IOException e) {
 855                                         }
 856                                 }
 857                         }
 858                 }
 859
 860                 return null;
 861         }
 862
 863         /**
 864          * Check if the given resource can be a local image or a remote image, then
 865          * refresh the cache with it if it is.
 866          *
 867          * @param source
 868          *            the story source
 869          * @param line
 870          *            the resource to check
 871          *
 872          * @return the image URL if found, or NULL
 873          *
 874          */
 875         static URL getImageUrl(BasicSupport support, URL source, String line) {
 876                 URL url = null;
 877
 878                 if (line != null) {
 879                         // try for files
 880                         if (source != null) {
 881                                 try {
 882
 883                                         String relPath = null;
 884                                         String absPath = null;
 885                                         try {
 886                                                 String path = new File(source.getFile()).getParent();
 887                                                 relPath = new File(new File(path), line.trim())
 888                                                                 .getAbsolutePath();
 889                                         } catch (Exception e) {
 890                                                 // Cannot be converted to path (one possibility to take
 891                                                 // into account: absolute path on Windows)
 892                                         }
 893                                         try {
 894                                                 absPath = new File(line.trim()).getAbsolutePath();
 895                                         } catch (Exception e) {
 896                                                 // Cannot be converted to path (at all)
 897                                         }
 898
 899                                         for (String ext : getImageExt(true)) {
 900                                                 if (absPath != null && new File(absPath + ext).exists()) {
 901                                                         url = new File(absPath + ext).toURI().toURL();
 902                                                 } else if (relPath != null
 903                                                                 && new File(relPath + ext).exists()) {
 904                                                         url = new File(relPath + ext).toURI().toURL();
 905                                                 }
 906                                         }
 907                                 } catch (Exception e) {
 908                                         // Should not happen since we control the correct arguments
 909                                 }
 910                         }
 911
 912                         if (url == null) {
 913                                 // try for URLs
 914                                 try {
 915                                         for (String ext : getImageExt(true)) {
 916                                                 if (Instance.getCache().check(new URL(line + ext))) {
 917                                                         url = new URL(line + ext);
 918                                                         break;
 919                                                 }
 920                                         }
 921
 922                                         // try out of cache
 923                                         if (url == null) {
 924                                                 for (String ext : getImageExt(true)) {
 925                                                         try {
 926                                                                 url = new URL(line + ext);
 927                                                                 Instance.getCache().refresh(url, support, true);
 928                                                                 break;
 929                                                         } catch (IOException e) {
 930                                                                 // no image with this ext
 931                                                                 url = null;
 932                                                         }
 933                                                 }
 934                                         }
 935                                 } catch (MalformedURLException e) {
 936                                         // Not an url
 937                                 }
 938                         }
 939
 940                         // refresh the cached file
 941                         if (url != null) {
 942                                 try {
 943                                         Instance.getCache().refresh(url, support, true);
 944                                 } catch (IOException e) {
 945                                         // woops, broken image
 946                                         url = null;
 947                                 }
 948                         }
 949                 }
 950
 951                 return url;
 952         }
 953
 954         /**
 955          * Open the input file that will be used through the support.
 956          *
 957          * @param source
 958          *            the source {@link URL}
 959          *
 960          * @return the {@link InputStream}
 961          *
 962          * @throws IOException
 963          *             in case of I/O error
 964          */
 965         protected InputStream openInput(URL source) throws IOException {
 966                 return Instance.getCache().open(source, this, false);
 967         }
 968
 969         /**
 970          * Reset the given {@link InputStream} and return it.
 971          *
 972          * @param in
 973          *            the {@link InputStream} to reset
 974          *
 975          * @return the same {@link InputStream} after reset
 976          */
 977         protected InputStream reset(InputStream in) {
 978                 try {
 979                         in.reset();
 980                 } catch (IOException e) {
 981                 }
 982                 return in;
 983         }
 984
 985         /**
 986          * Reset then return {@link BasicSupport#in}.
 987          *
 988          * @return {@link BasicSupport#in}
 989          */
 990         protected InputStream getInput() {
 991                 return reset(in);
 992         }
 993
 994         /**
 995          * Fix the author name if it is prefixed with some "by" {@link String}.
 996          *
 997          * @param author
 998          *            the author with a possible prefix
 999          *
1000          * @return the author without prefixes
1001          */
1002         protected String fixAuthor(String author) {
1003                 if (author != null) {
1004                         for (String suffix : new String[] { " ", ":" }) {
1005                                 for (String byString : Instance.getConfig()
1006                                                 .getString(Config.BYS).split(",")) {
1007                                         byString += suffix;
1008                                         if (author.toUpperCase().startsWith(byString.toUpperCase())) {
1009                                                 author = author.substring(byString.length()).trim();
1010                                         }
1011                                 }
1012                         }
1013
1014                         // Special case (without suffix):
1015                         if (author.startsWith("©")) {
1016                                 author = author.substring(1);
1017                         }
1018                 }
1019
1020                 return author;
1021         }
1022
1023         /**
1024          * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
1025          * and requotify them (i.e., separate them into QUOTE paragraphs and other
1026          * paragraphs (quotes or not)).
1027          *
1028          * @param para
1029          *            the paragraph to requotify (not necessarily a quote)
1030          *
1031          * @return the correctly (or so we hope) quotified paragraphs
1032          */
1033         protected List<Paragraph> requotify(Paragraph para) {
1034                 List<Paragraph> newParas = new ArrayList<Paragraph>();
1035
1036                 if (para.getType() == ParagraphType.QUOTE
1037                                 && para.getContent().length() > 2) {
1038                         String line = para.getContent();
1039                         boolean singleQ = line.startsWith("" + openQuote);
1040                         boolean doubleQ = line.startsWith("" + openDoubleQuote);
1041
1042                         // Do not try when more than one quote at a time
1043                         // (some stories are not easily readable if we do)
1044                         if (singleQ
1045                                         && line.indexOf(closeQuote, 1) < line
1046                                                         .lastIndexOf(closeQuote)) {
1047                                 newParas.add(para);
1048                                 return newParas;
1049                         }
1050                         if (doubleQ
1051                                         && line.indexOf(closeDoubleQuote, 1) < line
1052                                                         .lastIndexOf(closeDoubleQuote)) {
1053                                 newParas.add(para);
1054                                 return newParas;
1055                         }
1056                         //
1057
1058                         if (!singleQ && !doubleQ) {
1059                                 line = openDoubleQuote + line + closeDoubleQuote;
1060                                 newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
1061                                                 .getWords()));
1062                         } else {
1063                                 char open = singleQ ? openQuote : openDoubleQuote;
1064                                 char close = singleQ ? closeQuote : closeDoubleQuote;
1065
1066                                 int posDot = -1;
1067                                 boolean inQuote = false;
1068                                 int i = 0;
1069                                 for (char car : line.toCharArray()) {
1070                                         if (car == open) {
1071                                                 inQuote = true;
1072                                         } else if (car == close) {
1073                                                 inQuote = false;
1074                                         } else if (car == '.' && !inQuote) {
1075                                                 posDot = i;
1076                                                 break;
1077                                         }
1078                                         i++;
1079                                 }
1080
1081                                 if (posDot >= 0) {
1082                                         String rest = line.substring(posDot + 1).trim();
1083                                         line = line.substring(0, posDot + 1).trim();
1084                                         long words = 1;
1085                                         for (char car : line.toCharArray()) {
1086                                                 if (car == ' ') {
1087                                                         words++;
1088                                                 }
1089                                         }
1090                                         newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
1091                                         if (!rest.isEmpty()) {
1092                                                 newParas.addAll(requotify(processPara(rest)));
1093                                         }
1094                                 } else {
1095                                         newParas.add(para);
1096                                 }
1097                         }
1098                 } else {
1099                         newParas.add(para);
1100                 }
1101
1102                 return newParas;
1103         }
1104
1105         /**
1106          * Process a {@link Paragraph} from a raw line of text.
1107          * <p>
1108          * Will also fix quotes and HTML encoding if needed.
1109          *
1110          * @param line
1111          *            the raw line
1112          *
1113          * @return the processed {@link Paragraph}
1114          */
1115         protected Paragraph processPara(String line) {
1116                 line = ifUnhtml(line).trim();
1117
1118                 boolean space = true;
1119                 boolean brk = true;
1120                 boolean quote = false;
1121                 boolean tentativeCloseQuote = false;
1122                 char prev = '\0';
1123                 int dashCount = 0;
1124                 long words = 1;
1125
1126                 StringBuilder builder = new StringBuilder();
1127                 for (char car : line.toCharArray()) {
1128                         if (car != '-') {
1129                                 if (dashCount > 0) {
1130                                         // dash, ndash and mdash: - – —
1131                                         // currently: always use mdash
1132                                         builder.append(dashCount == 1 ? '-' : '—');
1133                                 }
1134                                 dashCount = 0;
1135                         }
1136
1137                         if (tentativeCloseQuote) {
1138                                 tentativeCloseQuote = false;
1139                                 if (Character.isLetterOrDigit(car)) {
1140                                         builder.append("'");
1141                                 } else {
1142                                         // handle double-single quotes as double quotes
1143                                         if (prev == car) {
1144                                                 builder.append(closeDoubleQuote);
1145                                                 continue;
1146                                         } else {
1147                                                 builder.append(closeQuote);
1148                                         }
1149                                 }
1150                         }
1151
1152                         switch (car) {
1153                         case ' ': // note: unbreakable space
1154                         case ' ':
1155                         case '\t':
1156                         case '\n': // just in case
1157                         case '\r': // just in case
1158                                 if (builder.length() > 0
1159                                                 && builder.charAt(builder.length() - 1) != ' ') {
1160                                         words++;
1161                                 }
1162                                 builder.append(' ');
1163                                 break;
1164
1165                         case '\'':
1166                                 if (space || (brk && quote)) {
1167                                         quote = true;
1168                                         // handle double-single quotes as double quotes
1169                                         if (prev == car) {
1170                                                 builder.deleteCharAt(builder.length() - 1);
1171                                                 builder.append(openDoubleQuote);
1172                                         } else {
1173                                                 builder.append(openQuote);
1174                                         }
1175                                 } else if (prev == ' ' || prev == car) {
1176                                         // handle double-single quotes as double quotes
1177                                         if (prev == car) {
1178                                                 builder.deleteCharAt(builder.length() - 1);
1179                                                 builder.append(openDoubleQuote);
1180                                         } else {
1181                                                 builder.append(openQuote);
1182                                         }
1183                                 } else {
1184                                         // it is a quote ("I'm off") or a 'quote' ("This
1185                                         // 'good' restaurant"...)
1186                                         tentativeCloseQuote = true;
1187                                 }
1188                                 break;
1189
1190                         case '"':
1191                                 if (space || (brk && quote)) {
1192                                         quote = true;
1193                                         builder.append(openDoubleQuote);
1194                                 } else if (prev == ' ') {
1195                                         builder.append(openDoubleQuote);
1196                                 } else {
1197                                         builder.append(closeDoubleQuote);
1198                                 }
1199                                 break;
1200
1201                         case '-':
1202                                 if (space) {
1203                                         quote = true;
1204                                 } else {
1205                                         dashCount++;
1206                                 }
1207                                 space = false;
1208                                 break;
1209
1210                         case '*':
1211                         case '~':
1212                         case '/':
1213                         case '\\':
1214                         case '<':
1215                         case '>':
1216                         case '=':
1217                         case '+':
1218                         case '_':
1219                         case '–':
1220                         case '—':
1221                                 space = false;
1222                                 builder.append(car);
1223                                 break;
1224
1225                         case '‘':
1226                         case '`':
1227                         case '‹':
1228                         case '﹁':
1229                         case '〈':
1230                         case '「':
1231                                 if (space || (brk && quote)) {
1232                                         quote = true;
1233                                         builder.append(openQuote);
1234                                 } else {
1235                                         // handle double-single quotes as double quotes
1236                                         if (prev == car) {
1237                                                 builder.deleteCharAt(builder.length() - 1);
1238                                                 builder.append(openDoubleQuote);
1239                                         } else {
1240                                                 builder.append(openQuote);
1241                                         }
1242                                 }
1243                                 space = false;
1244                                 brk = false;
1245                                 break;
1246
1247                         case '’':
1248                         case '›':
1249                         case '﹂':
1250                         case '〉':
1251                         case '」':
1252                                 space = false;
1253                                 brk = false;
1254                                 // handle double-single quotes as double quotes
1255                                 if (prev == car) {
1256                                         builder.deleteCharAt(builder.length() - 1);
1257                                         builder.append(closeDoubleQuote);
1258                                 } else {
1259                                         builder.append(closeQuote);
1260                                 }
1261                                 break;
1262
1263                         case '«':
1264                         case '“':
1265                         case '﹃':
1266                         case '《':
1267                         case '『':
1268                                 if (space || (brk && quote)) {
1269                                         quote = true;
1270                                         builder.append(openDoubleQuote);
1271                                 } else {
1272                                         builder.append(openDoubleQuote);
1273                                 }
1274                                 space = false;
1275                                 brk = false;
1276                                 break;
1277
1278                         case '»':
1279                         case '”':
1280                         case '﹄':
1281                         case '》':
1282                         case '』':
1283                                 space = false;
1284                                 brk = false;
1285                                 builder.append(closeDoubleQuote);
1286                                 break;
1287
1288                         default:
1289                                 space = false;
1290                                 brk = false;
1291                                 builder.append(car);
1292                                 break;
1293                         }
1294
1295                         prev = car;
1296                 }
1297
1298                 if (tentativeCloseQuote) {
1299                         tentativeCloseQuote = false;
1300                         builder.append(closeQuote);
1301                 }
1302
1303                 line = builder.toString().trim();
1304
1305                 ParagraphType type = ParagraphType.NORMAL;
1306                 if (space) {
1307                         type = ParagraphType.BLANK;
1308                 } else if (brk) {
1309                         type = ParagraphType.BREAK;
1310                 } else if (quote) {
1311                         type = ParagraphType.QUOTE;
1312                 }
1313
1314                 return new Paragraph(type, line, words);
1315         }
1316
1317         /**
1318          * Remove the HTML from the input <b>if</b> {@link BasicSupport#isHtml()} is
1319          * true.
1320          *
1321          * @param input
1322          *            the input
1323          *
1324          * @return the no html version if needed
1325          */
1326         private String ifUnhtml(String input) {
1327                 if (isHtml() && input != null) {
1328                         return StringUtils.unhtml(input);
1329                 }
1330
1331                 return input;
1332         }
1333
1334         /**
1335          * Return a {@link BasicSupport} implementation supporting the given
1336          * resource if possible.
1337          *
1338          * @param url
1339          *            the story resource
1340          *
1341          * @return an implementation that supports it, or NULL
1342          */
1343         public static BasicSupport getSupport(URL url) {
1344                 if (url == null) {
1345                         return null;
1346                 }
1347
1348                 // TEXT and INFO_TEXT always support files (not URLs though)
1349                 for (SupportType type : SupportType.values()) {
1350                         if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1351                                 BasicSupport support = getSupport(type);
1352                                 if (support != null && support.supports(url)) {
1353                                         return support;
1354                                 }
1355                         }
1356                 }
1357
1358                 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
1359                                 SupportType.TEXT }) {
1360                         BasicSupport support = getSupport(type);
1361                         if (support != null && support.supports(url)) {
1362                                 return support;
1363                         }
1364                 }
1365
1366                 return null;
1367         }
1368
1369         /**
1370          * Return a {@link BasicSupport} implementation supporting the given type.
1371          *
1372          * @param type
1373          *            the type
1374          *
1375          * @return an implementation that supports it, or NULL
1376          */
1377         public static BasicSupport getSupport(SupportType type) {
1378                 switch (type) {
1379                 case EPUB:
1380                         return new Epub().setType(type);
1381                 case INFO_TEXT:
1382                         return new InfoText().setType(type);
1383                 case FIMFICTION:
1384                         return new Fimfiction().setType(type);
1385                 case FANFICTION:
1386                         return new Fanfiction().setType(type);
1387                 case TEXT:
1388                         return new Text().setType(type);
1389                 case MANGAFOX:
1390                         return new MangaFox().setType(type);
1391                 case E621:
1392                         return new E621().setType(type);
1393                 case YIFFSTAR:
1394                         return new YiffStar().setType(type);
1395                 case E_HENTAI:
1396                         return new EHentai().setType(type);
1397                 case CBZ:
1398                         return new Cbz().setType(type);
1399                 case HTML:
1400                         return new Html().setType(type);
1401                 }
1402
1403                 return null;
1404         }
1405
1406         /**
1407          * Return the first line from the given input which correspond to the given
1408          * selectors.
1409          *
1410          * @param in
1411          *            the input
1412          * @param needle
1413          *            a string that must be found inside the target line (also
1414          *            supports "^" at start to say "only if it starts with" the
1415          *            needle)
1416          * @param relativeLine
1417          *            the line to return based upon the target line position (-1 =
1418          *            the line before, 0 = the target line...)
1419          *
1420          * @return the line
1421          */
1422         static String getLine(InputStream in, String needle, int relativeLine) {
1423                 return getLine(in, needle, relativeLine, true);
1424         }
1425
1426         /**
1427          * Return a line from the given input which correspond to the given
1428          * selectors.
1429          *
1430          * @param in
1431          *            the input
1432          * @param needle
1433          *            a string that must be found inside the target line (also
1434          *            supports "^" at start to say "only if it starts with" the
1435          *            needle)
1436          * @param relativeLine
1437          *            the line to return based upon the target line position (-1 =
1438          *            the line before, 0 = the target line...)
1439          * @param first
1440          *            takes the first result (as opposed to the last one, which will
1441          *            also always spend the input)
1442          *
1443          * @return the line
1444          */
1445         static String getLine(InputStream in, String needle, int relativeLine,
1446                         boolean first) {
1447                 String rep = null;
1448
1449                 try {
1450                         in.reset();
1451                 } catch (IOException e) {
1452                         Instance.syserr(e);
1453                 }
1454
1455                 List<String> lines = new ArrayList<String>();
1456                 @SuppressWarnings("resource")
1457                 Scanner scan = new Scanner(in, "UTF-8");
1458                 int index = -1;
1459                 scan.useDelimiter("\\n");
1460                 while (scan.hasNext()) {
1461                         lines.add(scan.next());
1462
1463                         if (index == -1) {
1464                                 if (needle.startsWith("^")) {
1465                                         if (lines.get(lines.size() - 1).startsWith(
1466                                                         needle.substring(1))) {
1467                                                 index = lines.size() - 1;
1468                                         }
1469
1470                                 } else {
1471                                         if (lines.get(lines.size() - 1).contains(needle)) {
1472                                                 index = lines.size() - 1;
1473                                         }
1474                                 }
1475                         }
1476
1477                         if (index >= 0 && index + relativeLine < lines.size()) {
1478                                 rep = lines.get(index + relativeLine);
1479                                 if (first) {
1480                                         break;
1481                                 }
1482                         }
1483                 }
1484
1485                 return rep;
1486         }
1487
1488         /**
1489          * Return the text between the key and the endKey (and optional subKey can
1490          * be passed, in this case we will look for the key first, then take the
1491          * text between the subKey and the endKey).
1492          * <p>
1493          * Will only match the first line with the given key if more than one are
1494          * possible. Which also means that if the subKey or endKey is not found on
1495          * that line, NULL will be returned.
1496          *
1497          * @param in
1498          *            the input
1499          * @param key
1500          *            the key to match (also supports "^" at start to say
1501          *            "only if it starts with" the key)
1502          * @param subKey
1503          *            the sub key or NULL if none
1504          * @param endKey
1505          *            the end key or NULL for "up to the end"
1506          * @return the text or NULL if not found
1507          */
1508         static String getKeyLine(InputStream in, String key, String subKey,
1509                         String endKey) {
1510                 String result = null;
1511
1512                 String line = getLine(in, key, 0);
1513                 if (line != null && line.contains(key)) {
1514                         line = line.substring(line.indexOf(key) + key.length());
1515                         if (subKey == null || subKey.isEmpty() || line.contains(subKey)) {
1516                                 if (subKey != null) {
1517                                         line = line.substring(line.indexOf(subKey)
1518                                                         + subKey.length());
1519                                 }
1520                                 if (endKey == null || line.contains(endKey)) {
1521                                         if (endKey != null) {
1522                                                 line = line.substring(0, line.indexOf(endKey));
1523                                                 result = line;
1524                                         }
1525                                 }
1526                         }
1527                 }
1528
1529                 return result;
1530         }
1531 }