src/be/nikiroo/fanfix/supported/BasicSupport.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.io.ByteArrayInputStream;
   4 import java.io.File;
   5 import java.io.IOException;
   6 import java.io.InputStream;
   7 import java.net.MalformedURLException;
   8 import java.net.URL;
   9 import java.nio.charset.StandardCharsets;
  10 import java.util.ArrayList;
  11 import java.util.HashMap;
  12 import java.util.List;
  13 import java.util.Map;
  14 import java.util.Map.Entry;
  15 import java.util.Scanner;
  16
  17 import be.nikiroo.fanfix.Instance;
  18 import be.nikiroo.fanfix.bundles.Config;
  19 import be.nikiroo.fanfix.bundles.StringId;
  20 import be.nikiroo.fanfix.data.Chapter;
  21 import be.nikiroo.fanfix.data.MetaData;
  22 import be.nikiroo.fanfix.data.Paragraph;
  23 import be.nikiroo.fanfix.data.Story;
  24 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
  25 import be.nikiroo.utils.StringUtils;
  26
  27 /**
  28  * This class is the base class used by the other support classes. It can be
  29  * used outside of this package, and have static method that you can use to get
  30  * access to the correct support class.
  31  * <p>
  32  * It will be used with 'resources' (usually web pages or files).
  33  *
  34  * @author niki
  35  */
  36 public abstract class BasicSupport {
  37         /**
  38          * The supported input types for which we can get a {@link BasicSupport}
  39          * object.
  40          *
  41          * @author niki
  42          */
  43         public enum SupportType {
  44                 /** EPUB files created with this program */
  45                 EPUB,
  46                 /** Pure text file with some rules */
  47                 TEXT,
  48                 /** TEXT but with associated .info file */
  49                 INFO_TEXT,
  50                 /** My Little Pony fanfictions */
  51                 FIMFICTION,
  52                 /** Fanfictions from a lot of different universes */
  53                 FANFICTION,
  54                 /** Website with lots of Mangas */
  55                 MANGAFOX,
  56                 /** Furry website with comics support */
  57                 E621,
  58                 /** CBZ files */
  59                 CBZ;
  60
  61                 /**
  62                  * A description of this support type (more information than the
  63                  * {@link BasicSupport#getSourceName()}).
  64                  *
  65                  * @return the description
  66                  */
  67                 public String getDesc() {
  68                         String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
  69                                         this.name());
  70
  71                         if (desc == null) {
  72                                 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
  73                         }
  74
  75                         return desc;
  76                 }
  77
  78                 /**
  79                  * The name of this support type (a short version).
  80                  *
  81                  * @return the name
  82                  */
  83                 public String getSourceName() {
  84                         BasicSupport support = BasicSupport.getSupport(this);
  85                         if (support != null) {
  86                                 return support.getSourceName();
  87                         }
  88
  89                         return null;
  90                 }
  91
  92                 @Override
  93                 public String toString() {
  94                         return super.toString().toLowerCase();
  95                 }
  96
  97                 /**
  98                  * Call {@link SupportType#valueOf(String.toUpperCase())}.
  99                  *
 100                  * @param typeName
 101                  *            the possible type name
 102                  *
 103                  * @return NULL or the type
 104                  */
 105                 public static SupportType valueOfUC(String typeName) {
 106                         return SupportType.valueOf(typeName == null ? null : typeName
 107                                         .toUpperCase());
 108                 }
 109
 110                 /**
 111                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 112                  * NULL for NULL instead of raising exception.
 113                  *
 114                  * @param typeName
 115                  *            the possible type name
 116                  *
 117                  * @return NULL or the type
 118                  */
 119                 public static SupportType valueOfNullOkUC(String typeName) {
 120                         if (typeName == null) {
 121                                 return null;
 122                         }
 123
 124                         return SupportType.valueOfUC(typeName);
 125                 }
 126
 127                 /**
 128                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 129                  * NULL in case of error instead of raising an exception.
 130                  *
 131                  * @param typeName
 132                  *            the possible type name
 133                  *
 134                  * @return NULL or the type
 135                  */
 136                 public static SupportType valueOfAllOkUC(String typeName) {
 137                         try {
 138                                 return SupportType.valueOfUC(typeName);
 139                         } catch (Exception e) {
 140                                 return null;
 141                         }
 142                 }
 143         }
 144
 145         /** Only used by {@link BasicSupport#getInput()} just so it is always reset. */
 146         private InputStream in;
 147         private SupportType type;
 148         private URL currentReferer; // with on 'r', as in 'HTTP'...
 149
 150         // quote chars
 151         private char openQuote = Instance.getTrans().getChar(
 152                         StringId.OPEN_SINGLE_QUOTE);
 153         private char closeQuote = Instance.getTrans().getChar(
 154                         StringId.CLOSE_SINGLE_QUOTE);
 155         private char openDoubleQuote = Instance.getTrans().getChar(
 156                         StringId.OPEN_DOUBLE_QUOTE);
 157         private char closeDoubleQuote = Instance.getTrans().getChar(
 158                         StringId.CLOSE_DOUBLE_QUOTE);
 159
 160         /**
 161          * The name of this support class.
 162          *
 163          * @return the name
 164          */
 165         protected abstract String getSourceName();
 166
 167         /**
 168          * Check if the given resource is supported by this {@link BasicSupport}.
 169          *
 170          * @param url
 171          *            the resource to check for
 172          *
 173          * @return TRUE if it is
 174          */
 175         protected abstract boolean supports(URL url);
 176
 177         /**
 178          * Return TRUE if the support will return HTML encoded content values for
 179          * the chapters content.
 180          *
 181          * @return TRUE for HTML
 182          */
 183         protected abstract boolean isHtml();
 184
 185         /**
 186          * Return the story title.
 187          *
 188          * @param source
 189          *            the source of the story
 190          * @param in
 191          *            the input (the main resource)
 192          *
 193          * @return the title
 194          *
 195          * @throws IOException
 196          *             in case of I/O error
 197          */
 198         protected abstract String getTitle(URL source, InputStream in)
 199                         throws IOException;
 200
 201         /**
 202          * Return the story author.
 203          *
 204          * @param source
 205          *            the source of the story
 206          * @param in
 207          *            the input (the main resource)
 208          *
 209          * @return the author
 210          *
 211          * @throws IOException
 212          *             in case of I/O error
 213          */
 214         protected abstract String getAuthor(URL source, InputStream in)
 215                         throws IOException;
 216
 217         /**
 218          * Return the story publication date.
 219          *
 220          * @param source
 221          *            the source of the story
 222          * @param in
 223          *            the input (the main resource)
 224          *
 225          * @return the date
 226          *
 227          * @throws IOException
 228          *             in case of I/O error
 229          */
 230         protected abstract String getDate(URL source, InputStream in)
 231                         throws IOException;
 232
 233         /**
 234          * Return the subject of the story (for instance, if it is a fanfiction,
 235          * what is the original work; if it is a technical text, what is the
 236          * technical subject...).
 237          *
 238          * @param source
 239          *            the source of the story
 240          * @param in
 241          *            the input (the main resource)
 242          *
 243          * @return the subject
 244          *
 245          * @throws IOException
 246          *             in case of I/O error
 247          */
 248         protected abstract String getSubject(URL source, InputStream in)
 249                         throws IOException;
 250
 251         /**
 252          * Return the story description.
 253          *
 254          * @param source
 255          *            the source of the story
 256          * @param in
 257          *            the input (the main resource)
 258          *
 259          * @return the description
 260          *
 261          * @throws IOException
 262          *             in case of I/O error
 263          */
 264         protected abstract String getDesc(URL source, InputStream in)
 265                         throws IOException;
 266
 267         /**
 268          * Return the story cover resource if any, or NULL if none.
 269          * <p>
 270          * The default cover should not be checked for here.
 271          *
 272          * @param source
 273          *            the source of the story
 274          * @param in
 275          *            the input (the main resource)
 276          *
 277          * @return the cover or NULL
 278          *
 279          * @throws IOException
 280          *             in case of I/O error
 281          */
 282         protected abstract URL getCover(URL source, InputStream in)
 283                         throws IOException;
 284
 285         /**
 286          * Return the list of chapters (name and resource).
 287          *
 288          * @param source
 289          *            the source of the story
 290          * @param in
 291          *            the input (the main resource)
 292          *
 293          * @return the chapters
 294          *
 295          * @throws IOException
 296          *             in case of I/O error
 297          */
 298         protected abstract List<Entry<String, URL>> getChapters(URL source,
 299                         InputStream in) throws IOException;
 300
 301         /**
 302          * Return the content of the chapter (possibly HTML encoded, if
 303          * {@link BasicSupport#isHtml()} is TRUE).
 304          *
 305          * @param source
 306          *            the source of the story
 307          * @param in
 308          *            the input (the main resource)
 309          * @param number
 310          *            the chapter number
 311          *
 312          * @return the content
 313          *
 314          * @throws IOException
 315          *             in case of I/O error
 316          */
 317         protected abstract String getChapterContent(URL source, InputStream in,
 318                         int number) throws IOException;
 319
 320         /**
 321          * Check if this {@link BasicSupport} is mainly catered to image files.
 322          *
 323          * @return TRUE if it is
 324          */
 325         public boolean isImageDocument(URL source, InputStream in)
 326                         throws IOException {
 327                 return false;
 328         }
 329
 330         /**
 331          * Return the list of cookies (values included) that must be used to
 332          * correctly fetch the resources.
 333          * <p>
 334          * You are expected to call the super method implementation if you override
 335          * it.
 336          *
 337          * @return the cookies
 338          */
 339         public Map<String, String> getCookies() {
 340                 return new HashMap<String, String>();
 341         }
 342
 343         /**
 344          * Process the given story resource into a partially filled {@link Story}
 345          * object containing the name and metadata, except for the description.
 346          *
 347          * @param url
 348          *            the story resource
 349          *
 350          * @return the {@link Story}
 351          *
 352          * @throws IOException
 353          *             in case of I/O error
 354          */
 355         public Story processMeta(URL url) throws IOException {
 356                 return processMeta(url, true, false);
 357         }
 358
 359         /**
 360          * Process the given story resource into a partially filled {@link Story}
 361          * object containing the name and metadata.
 362          *
 363          * @param url
 364          *            the story resource
 365          *
 366          * @param close
 367          *            close "this" and "in" when done
 368          *
 369          * @return the {@link Story}
 370          *
 371          * @throws IOException
 372          *             in case of I/O error
 373          */
 374         protected Story processMeta(URL url, boolean close, boolean getDesc)
 375                         throws IOException {
 376                 in = Instance.getCache().open(url, this, false);
 377                 if (in == null) {
 378                         return null;
 379                 }
 380
 381                 try {
 382                         preprocess(getInput());
 383
 384                         Story story = new Story();
 385                         story.setMeta(new MetaData());
 386                         story.getMeta().setTitle(ifUnhtml(getTitle(url, getInput())));
 387                         story.getMeta().setAuthor(
 388                                         fixAuthor(ifUnhtml(getAuthor(url, getInput()))));
 389                         story.getMeta().setDate(ifUnhtml(getDate(url, getInput())));
 390                         story.getMeta().setTags(getTags(url, getInput()));
 391                         story.getMeta().setSource(getSourceName());
 392                         story.getMeta().setPublisher(
 393                                         ifUnhtml(getPublisher(url, getInput())));
 394                         story.getMeta().setUuid(getUuid(url, getInput()));
 395                         story.getMeta().setLuid(getLuid(url, getInput()));
 396                         story.getMeta().setLang(getLang(url, getInput()));
 397                         story.getMeta().setSubject(ifUnhtml(getSubject(url, getInput())));
 398                         story.getMeta().setImageDocument(isImageDocument(url, getInput()));
 399
 400                         if (getDesc) {
 401                                 String descChapterName = Instance.getTrans().getString(
 402                                                 StringId.DESCRIPTION);
 403                                 story.getMeta().setResume(
 404                                                 makeChapter(url, 0, descChapterName,
 405                                                                 getDesc(url, getInput())));
 406                         }
 407
 408                         return story;
 409                 } finally {
 410                         if (close) {
 411                                 try {
 412                                         close();
 413                                 } catch (IOException e) {
 414                                         Instance.syserr(e);
 415                                 }
 416
 417                                 if (in != null) {
 418                                         in.close();
 419                                 }
 420                         }
 421                 }
 422         }
 423
 424         /**
 425          * Process the given story resource into a fully filled {@link Story}
 426          * object.
 427          *
 428          * @param url
 429          *            the story resource
 430          *
 431          * @return the {@link Story}
 432          *
 433          * @throws IOException
 434          *             in case of I/O error
 435          */
 436         public Story process(URL url) throws IOException {
 437                 setCurrentReferer(url);
 438
 439                 try {
 440                         Story story = processMeta(url, false, true);
 441                         if (story == null) {
 442                                 return null;
 443                         }
 444
 445                         story.setChapters(new ArrayList<Chapter>());
 446
 447                         URL cover = getCover(url, getInput());
 448                         if (cover == null) {
 449                                 String subject = story.getMeta() == null ? null : story
 450                                                 .getMeta().getSubject();
 451                                 if (subject != null && !subject.isEmpty()
 452                                                 && Instance.getCoverDir() != null) {
 453                                         File fileCover = new File(Instance.getCoverDir(), subject);
 454                                         cover = getImage(fileCover.toURI().toURL(), subject);
 455                                 }
 456                         }
 457
 458                         if (cover != null) {
 459                                 InputStream coverIn = null;
 460                                 try {
 461                                         coverIn = Instance.getCache().open(cover, this, true);
 462                                         story.getMeta().setCover(StringUtils.toImage(coverIn));
 463                                 } catch (IOException e) {
 464                                         Instance.syserr(new IOException(Instance.getTrans()
 465                                                         .getString(StringId.ERR_BS_NO_COVER, cover), e));
 466                                 } finally {
 467                                         if (coverIn != null)
 468                                                 coverIn.close();
 469                                 }
 470                         }
 471
 472                         List<Entry<String, URL>> chapters = getChapters(url, getInput());
 473                         int i = 1;
 474                         if (chapters != null) {
 475                                 for (Entry<String, URL> chap : chapters) {
 476                                         setCurrentReferer(chap.getValue());
 477                                         InputStream chapIn = Instance.getCache().open(
 478                                                         chap.getValue(), this, true);
 479                                         try {
 480                                                 story.getChapters().add(
 481                                                                 makeChapter(url, i, chap.getKey(),
 482                                                                                 getChapterContent(url, chapIn, i)));
 483                                         } finally {
 484                                                 chapIn.close();
 485                                         }
 486                                         i++;
 487                                 }
 488                         }
 489
 490                         return story;
 491
 492                 } finally {
 493                         try {
 494                                 close();
 495                         } catch (IOException e) {
 496                                 Instance.syserr(e);
 497                         }
 498
 499                         if (in != null) {
 500                                 in.close();
 501                         }
 502
 503                         currentReferer = null;
 504                 }
 505         }
 506
 507         /**
 508          * The support type.$
 509          *
 510          * @return the type
 511          */
 512         public SupportType getType() {
 513                 return type;
 514         }
 515
 516         /**
 517          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 518          * the current {@link URL} we work on.
 519          *
 520          * @return the referer
 521          */
 522         public URL getCurrentReferer() {
 523                 return currentReferer;
 524         }
 525
 526         /**
 527          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 528          * the current {@link URL} we work on.
 529          *
 530          * @param currentReferer
 531          *            the new referer
 532          */
 533         protected void setCurrentReferer(URL currentReferer) {
 534                 this.currentReferer = currentReferer;
 535         }
 536
 537         /**
 538          * The support type.
 539          *
 540          * @param type
 541          *            the new type
 542          *
 543          * @return this
 544          */
 545         protected BasicSupport setType(SupportType type) {
 546                 this.type = type;
 547                 return this;
 548         }
 549
 550         /**
 551          * Return the story publisher (by default,
 552          * {@link BasicSupport#getSourceName()}).
 553          *
 554          * @param source
 555          *            the source of the story
 556          * @param in
 557          *            the input (the main resource)
 558          *
 559          * @return the publisher
 560          *
 561          * @throws IOException
 562          *             in case of I/O error
 563          */
 564         protected String getPublisher(URL source, InputStream in)
 565                         throws IOException {
 566                 return getSourceName();
 567         }
 568
 569         /**
 570          * Return the story UUID, a unique value representing the story (it is often
 571          * an URL).
 572          * <p>
 573          * By default, this is the {@link URL} of the resource.
 574          *
 575          * @param source
 576          *            the source of the story
 577          * @param in
 578          *            the input (the main resource)
 579          *
 580          * @return the uuid
 581          *
 582          * @throws IOException
 583          *             in case of I/O error
 584          */
 585         protected String getUuid(URL source, InputStream in) throws IOException {
 586                 return source.toString();
 587         }
 588
 589         /**
 590          * Return the story Library UID, a unique value representing the story (it
 591          * is often a number) in the local library.
 592          * <p>
 593          * By default, this is empty.
 594          *
 595          * @param source
 596          *            the source of the story
 597          * @param in
 598          *            the input (the main resource)
 599          *
 600          * @return the id
 601          *
 602          * @throws IOException
 603          *             in case of I/O error
 604          */
 605         protected String getLuid(URL source, InputStream in) throws IOException {
 606                 return "";
 607         }
 608
 609         /**
 610          * Return the 2-letter language code of this story.
 611          * <p>
 612          * By default, this is 'EN'.
 613          *
 614          * @param source
 615          *            the source of the story
 616          * @param in
 617          *            the input (the main resource)
 618          *
 619          * @return the language
 620          *
 621          * @throws IOException
 622          *             in case of I/O error
 623          */
 624         protected String getLang(URL source, InputStream in) throws IOException {
 625                 return "EN";
 626         }
 627
 628         /**
 629          * Return the list of tags for this story.
 630          *
 631          * @param source
 632          *            the source of the story
 633          * @param in
 634          *            the input (the main resource)
 635          *
 636          * @return the tags
 637          *
 638          * @throws IOException
 639          *             in case of I/O error
 640          */
 641         protected List<String> getTags(URL source, InputStream in)
 642                         throws IOException {
 643                 return new ArrayList<String>();
 644         }
 645
 646         /**
 647          * Return the first line from the given input which correspond to the given
 648          * selectors.
 649          * <p>
 650          * Do not reset the input, which will be pointing at the line just after the
 651          * result (input will be spent if no result is found).
 652          *
 653          * @param in
 654          *            the input
 655          * @param needle
 656          *            a string that must be found inside the target line (also
 657          *            supports "^" at start to say "only if it starts with" the
 658          *            needle)
 659          * @param relativeLine
 660          *            the line to return based upon the target line position (-1 =
 661          *            the line before, 0 = the target line...)
 662          *
 663          * @return the line
 664          */
 665         protected String getLine(InputStream in, String needle, int relativeLine) {
 666                 return getLine(in, needle, relativeLine, true);
 667         }
 668
 669         /**
 670          * Return a line from the given input which correspond to the given
 671          * selectors.
 672          * <p>
 673          * Do not reset the input, which will be pointing at the line just after the
 674          * result (input will be spent if no result is found) when first is TRUE,
 675          * and will always be spent if first is FALSE.
 676          *
 677          * @param in
 678          *            the input
 679          * @param needle
 680          *            a string that must be found inside the target line (also
 681          *            supports "^" at start to say "only if it starts with" the
 682          *            needle)
 683          * @param relativeLine
 684          *            the line to return based upon the target line position (-1 =
 685          *            the line before, 0 = the target line...)
 686          * @param first
 687          *            takes the first result (as opposed to the last one, which will
 688          *            also always spend the input)
 689          *
 690          * @return the line
 691          */
 692         protected String getLine(InputStream in, String needle, int relativeLine,
 693                         boolean first) {
 694                 String rep = null;
 695
 696                 List<String> lines = new ArrayList<String>();
 697                 @SuppressWarnings("resource")
 698                 Scanner scan = new Scanner(in, "UTF-8");
 699                 int index = -1;
 700                 scan.useDelimiter("\\n");
 701                 while (scan.hasNext()) {
 702                         lines.add(scan.next());
 703
 704                         if (index == -1) {
 705                                 if (needle.startsWith("^")) {
 706                                         if (lines.get(lines.size() - 1).startsWith(
 707                                                         needle.substring(1))) {
 708                                                 index = lines.size() - 1;
 709                                         }
 710
 711                                 } else {
 712                                         if (lines.get(lines.size() - 1).contains(needle)) {
 713                                                 index = lines.size() - 1;
 714                                         }
 715                                 }
 716                         }
 717
 718                         if (index >= 0 && index + relativeLine < lines.size()) {
 719                                 rep = lines.get(index + relativeLine);
 720                                 if (first) {
 721                                         break;
 722                                 }
 723                         }
 724                 }
 725
 726                 return rep;
 727         }
 728
 729         /**
 730          * Prepare the support if needed before processing.
 731          *
 732          * @throws IOException
 733          *             on I/O error
 734          */
 735         protected void preprocess(InputStream in) throws IOException {
 736         }
 737
 738         /**
 739          * Now that we have processed the {@link Story}, close the resources if any.
 740          *
 741          * @throws IOException
 742          *             on I/O error
 743          */
 744         protected void close() throws IOException {
 745         }
 746
 747         /**
 748          * Create a {@link Chapter} object from the given information, formatting
 749          * the content as it should be.
 750          *
 751          * @param number
 752          *            the chapter number
 753          * @param name
 754          *            the chapter name
 755          * @param content
 756          *            the chapter content
 757          *
 758          * @return the {@link Chapter}
 759          *
 760          * @throws IOException
 761          *             in case of I/O error
 762          */
 763         protected Chapter makeChapter(URL source, int number, String name,
 764                         String content) throws IOException {
 765
 766                 // Chapter name: process it correctly, then remove the possible
 767                 // redundant "Chapter x: " in front of it
 768                 String chapterName = processPara(name).getContent().trim();
 769                 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
 770                                 .split(",")) {
 771                         String chapterWord = Instance.getConfig().getStringX(
 772                                         Config.CHAPTER, lang);
 773                         if (chapterName.startsWith(chapterWord)) {
 774                                 chapterName = chapterName.substring(chapterWord.length())
 775                                                 .trim();
 776                                 break;
 777                         }
 778                 }
 779
 780                 if (chapterName.startsWith(Integer.toString(number))) {
 781                         chapterName = chapterName.substring(
 782                                         Integer.toString(number).length()).trim();
 783                 }
 784
 785                 if (chapterName.startsWith(":")) {
 786                         chapterName = chapterName.substring(1).trim();
 787                 }
 788                 //
 789
 790                 Chapter chap = new Chapter(number, chapterName);
 791
 792                 if (content == null) {
 793                         return chap;
 794                 }
 795
 796                 if (isHtml()) {
 797                         // Special <HR> processing:
 798                         content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
 799                                         "\n* * *\n");
 800                 }
 801
 802                 InputStream in = new ByteArrayInputStream(
 803                                 content.getBytes(StandardCharsets.UTF_8));
 804                 try {
 805                         @SuppressWarnings("resource")
 806                         Scanner scan = new Scanner(in, "UTF-8");
 807                         scan.useDelimiter("(\\n|</p>)"); // \n for test, </p> for html
 808
 809                         List<Paragraph> paras = new ArrayList<Paragraph>();
 810                         while (scan.hasNext()) {
 811                                 String line = scan.next().trim();
 812                                 boolean image = false;
 813                                 if (line.startsWith("[") && line.endsWith("]")) {
 814                                         URL url = getImage(source,
 815                                                         line.substring(1, line.length() - 1).trim());
 816                                         if (url != null) {
 817                                                 paras.add(new Paragraph(url));
 818                                                 image = true;
 819                                         }
 820                                 }
 821
 822                                 if (!image) {
 823                                         paras.add(processPara(line));
 824                                 }
 825                         }
 826
 827                         // Check quotes for "bad" format
 828                         List<Paragraph> newParas = new ArrayList<Paragraph>();
 829                         for (Paragraph para : paras) {
 830                                 newParas.addAll(requotify(para));
 831                         }
 832                         paras = newParas;
 833
 834                         // Remove double blanks/brks
 835                         boolean space = false;
 836                         boolean brk = true;
 837                         for (int i = 0; i < paras.size(); i++) {
 838                                 Paragraph para = paras.get(i);
 839                                 boolean thisSpace = para.getType() == ParagraphType.BLANK;
 840                                 boolean thisBrk = para.getType() == ParagraphType.BREAK;
 841
 842                                 if (space && thisBrk) {
 843                                         paras.remove(i - 1);
 844                                         i--;
 845                                 } else if ((space || brk) && (thisSpace || thisBrk)) {
 846                                         paras.remove(i);
 847                                         i--;
 848                                 }
 849
 850                                 space = thisSpace;
 851                                 brk = thisBrk;
 852                         }
 853
 854                         // Remove blank/brk at start
 855                         if (paras.size() > 0
 856                                         && (paras.get(0).getType() == ParagraphType.BLANK || paras
 857                                                         .get(0).getType() == ParagraphType.BREAK)) {
 858                                 paras.remove(0);
 859                         }
 860
 861                         // Remove blank/brk at end
 862                         int last = paras.size() - 1;
 863                         if (paras.size() > 0
 864                                         && (paras.get(last).getType() == ParagraphType.BLANK || paras
 865                                                         .get(last).getType() == ParagraphType.BREAK)) {
 866                                 paras.remove(last);
 867                         }
 868
 869                         chap.setParagraphs(paras);
 870
 871                         return chap;
 872                 } finally {
 873                         in.close();
 874                 }
 875         }
 876
 877         /**
 878          * Return the list of supported image extensions.
 879          *
 880          * @return the extensions
 881          */
 882         protected String[] getImageExt(boolean emptyAllowed) {
 883                 if (emptyAllowed) {
 884                         return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 885                 } else {
 886                         return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 887                 }
 888         }
 889
 890         /**
 891          * Check if the given resource can be a local image or a remote image, then
 892          * refresh the cache with it if it is.
 893          *
 894          * @param source
 895          *            the story source
 896          * @param line
 897          *            the resource to check
 898          *
 899          * @return the image URL if found, or NULL
 900          *
 901          */
 902         protected URL getImage(URL source, String line) {
 903                 String path = new File(source.getFile()).getParent();
 904                 URL url = null;
 905
 906                 // try for files
 907                 try {
 908                         String urlBase = new File(new File(path), line.trim()).toURI()
 909                                         .toURL().toString();
 910                         for (String ext : getImageExt(true)) {
 911                                 if (new File(urlBase + ext).exists()) {
 912                                         url = new File(urlBase + ext).toURI().toURL();
 913                                 }
 914                         }
 915                 } catch (Exception e) {
 916                         // Nothing to do here
 917                 }
 918
 919                 if (url == null) {
 920                         // try for URLs
 921                         try {
 922                                 for (String ext : getImageExt(true)) {
 923                                         if (Instance.getCache().check(new URL(line + ext))) {
 924                                                 url = new URL(line + ext);
 925                                         }
 926                                 }
 927
 928                                 // try out of cache
 929                                 if (url == null) {
 930                                         for (String ext : getImageExt(true)) {
 931                                                 try {
 932                                                         url = new URL(line + ext);
 933                                                         Instance.getCache().refresh(url, this, true);
 934                                                         break;
 935                                                 } catch (IOException e) {
 936                                                         // no image with this ext
 937                                                         url = null;
 938                                                 }
 939                                         }
 940                                 }
 941                         } catch (MalformedURLException e) {
 942                                 // Not an url
 943                         }
 944                 }
 945
 946                 // refresh the cached file
 947                 if (url != null) {
 948                         try {
 949                                 Instance.getCache().refresh(url, this, true);
 950                         } catch (IOException e) {
 951                                 // woops, broken image
 952                                 url = null;
 953                         }
 954                 }
 955
 956                 return url;
 957         }
 958
 959         /**
 960          * Reset then return {@link BasicSupport#in}.
 961          *
 962          * @return {@link BasicSupport#in}
 963          *
 964          * @throws IOException
 965          *             in case of I/O error
 966          */
 967         protected InputStream getInput() throws IOException {
 968                 in.reset();
 969                 return in;
 970         }
 971
 972         /**
 973          * Fix the author name if it is prefixed with some "by" {@link String}.
 974          *
 975          * @param author
 976          *            the author with a possible prefix
 977          *
 978          * @return the author without prefixes
 979          */
 980         private String fixAuthor(String author) {
 981                 if (author != null) {
 982                         for (String suffix : new String[] { " ", ":" }) {
 983                                 for (String byString : Instance.getConfig()
 984                                                 .getString(Config.BYS).split(",")) {
 985                                         byString += suffix;
 986                                         if (author.toUpperCase().startsWith(byString.toUpperCase())) {
 987                                                 author = author.substring(byString.length()).trim();
 988                                         }
 989                                 }
 990                         }
 991
 992                         // Special case (without suffix):
 993                         if (author.startsWith("©")) {
 994                                 author = author.substring(1);
 995                         }
 996                 }
 997
 998                 return author;
 999         }
1000
1001         /**
1002          * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
1003          * and requotify them (i.e., separate them into QUOTE paragraphs and other
1004          * paragraphs (quotes or not)).
1005          *
1006          * @param para
1007          *            the paragraph to requotify (not necessaraly a quote)
1008          *
1009          * @return the correctly (or so we hope) quotified paragraphs
1010          */
1011         private List<Paragraph> requotify(Paragraph para) {
1012                 List<Paragraph> newParas = new ArrayList<Paragraph>();
1013
1014                 if (para.getType() == ParagraphType.QUOTE) {
1015                         String line = para.getContent();
1016                         boolean singleQ = line.startsWith("" + openQuote);
1017                         boolean doubleQ = line.startsWith("" + openDoubleQuote);
1018
1019                         if (!singleQ && !doubleQ) {
1020                                 line = openDoubleQuote + line + closeDoubleQuote;
1021                                 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
1022                         } else {
1023                                 char close = singleQ ? closeQuote : closeDoubleQuote;
1024                                 int posClose = line.indexOf(close);
1025                                 int posDot = line.indexOf(".");
1026                                 while (posDot >= 0 && posDot < posClose) {
1027                                         posDot = line.indexOf(".", posDot + 1);
1028                                 }
1029
1030                                 if (posDot >= 0) {
1031                                         String rest = line.substring(posDot + 1).trim();
1032                                         line = line.substring(0, posDot + 1).trim();
1033                                         newParas.add(new Paragraph(ParagraphType.QUOTE, line));
1034                                         newParas.addAll(requotify(processPara(rest)));
1035                                 } else {
1036                                         newParas.add(para);
1037                                 }
1038                         }
1039                 } else {
1040                         newParas.add(para);
1041                 }
1042
1043                 return newParas;
1044         }
1045
1046         /**
1047          * Process a {@link Paragraph} from a raw line of text.
1048          * <p>
1049          * Will also fix quotes and HTML encoding if needed.
1050          *
1051          * @param line
1052          *            the raw line
1053          *
1054          * @return the processed {@link Paragraph}
1055          */
1056         private Paragraph processPara(String line) {
1057                 line = ifUnhtml(line).trim();
1058
1059                 boolean space = true;
1060                 boolean brk = true;
1061                 boolean quote = false;
1062                 boolean tentativeCloseQuote = false;
1063                 char prev = '\0';
1064                 int dashCount = 0;
1065
1066                 StringBuilder builder = new StringBuilder();
1067                 for (char car : line.toCharArray()) {
1068                         if (car != '-') {
1069                                 if (dashCount > 0) {
1070                                         // dash, ndash and mdash: - – —
1071                                         // currently: always use mdash
1072                                         builder.append(dashCount == 1 ? '-' : '—');
1073                                 }
1074                                 dashCount = 0;
1075                         }
1076
1077                         if (tentativeCloseQuote) {
1078                                 tentativeCloseQuote = false;
1079                                 if ((car >= 'a' && car <= 'z') || (car >= 'A' && car <= 'Z')
1080                                                 || (car >= '0' && car <= '9')) {
1081                                         builder.append("'");
1082                                 } else {
1083                                         builder.append(closeQuote);
1084                                 }
1085                         }
1086
1087                         switch (car) {
1088                         case ' ': // note: unbreakable space
1089                         case ' ':
1090                         case '\t':
1091                         case '\n': // just in case
1092                         case '\r': // just in case
1093                                 builder.append(' ');
1094                                 break;
1095
1096                         case '\'':
1097                                 if (space || (brk && quote)) {
1098                                         quote = true;
1099                                         builder.append(openQuote);
1100                                 } else if (prev == ' ') {
1101                                         builder.append(openQuote);
1102                                 } else {
1103                                         // it is a quote ("I'm off") or a 'quote' ("This
1104                                         // 'good' restaurant"...)
1105                                         tentativeCloseQuote = true;
1106                                 }
1107                                 break;
1108
1109                         case '"':
1110                                 if (space || (brk && quote)) {
1111                                         quote = true;
1112                                         builder.append(openDoubleQuote);
1113                                 } else if (prev == ' ') {
1114                                         builder.append(openDoubleQuote);
1115                                 } else {
1116                                         builder.append(closeDoubleQuote);
1117                                 }
1118                                 break;
1119
1120                         case '-':
1121                                 if (space) {
1122                                         quote = true;
1123                                 } else {
1124                                         dashCount++;
1125                                 }
1126                                 space = false;
1127                                 break;
1128
1129                         case '*':
1130                         case '~':
1131                         case '/':
1132                         case '\\':
1133                         case '<':
1134                         case '>':
1135                         case '=':
1136                         case '+':
1137                         case '_':
1138                         case '–':
1139                         case '—':
1140                                 space = false;
1141                                 builder.append(car);
1142                                 break;
1143
1144                         case '‘':
1145                         case '`':
1146                         case '‹':
1147                         case '﹁':
1148                         case '〈':
1149                         case '「':
1150                                 if (space || (brk && quote)) {
1151                                         quote = true;
1152                                         builder.append(openQuote);
1153                                 } else {
1154                                         builder.append(openQuote);
1155                                 }
1156                                 space = false;
1157                                 brk = false;
1158                                 break;
1159
1160                         case '’':
1161                         case '›':
1162                         case '﹂':
1163                         case '〉':
1164                         case '」':
1165                                 space = false;
1166                                 brk = false;
1167                                 builder.append(closeQuote);
1168                                 break;
1169
1170                         case '«':
1171                         case '“':
1172                         case '﹃':
1173                         case '《':
1174                         case '『':
1175                                 if (space || (brk && quote)) {
1176                                         quote = true;
1177                                         builder.append(openDoubleQuote);
1178                                 } else {
1179                                         builder.append(openDoubleQuote);
1180                                 }
1181                                 space = false;
1182                                 brk = false;
1183                                 break;
1184
1185                         case '»':
1186                         case '”':
1187                         case '﹄':
1188                         case '》':
1189                         case '』':
1190                                 space = false;
1191                                 brk = false;
1192                                 builder.append(closeDoubleQuote);
1193                                 break;
1194
1195                         default:
1196                                 space = false;
1197                                 brk = false;
1198                                 builder.append(car);
1199                                 break;
1200                         }
1201
1202                         prev = car;
1203                 }
1204
1205                 if (tentativeCloseQuote) {
1206                         tentativeCloseQuote = false;
1207                         builder.append(closeQuote);
1208                 }
1209
1210                 line = builder.toString().trim();
1211
1212                 ParagraphType type = ParagraphType.NORMAL;
1213                 if (space) {
1214                         type = ParagraphType.BLANK;
1215                 } else if (brk) {
1216                         type = ParagraphType.BREAK;
1217                 } else if (quote) {
1218                         type = ParagraphType.QUOTE;
1219                 }
1220
1221                 return new Paragraph(type, line);
1222         }
1223
1224         /**
1225          * Remove the HTML from the inpit <b>if</b> {@link BasicSupport#isHtml()} is
1226          * true.
1227          *
1228          * @param input
1229          *            the input
1230          *
1231          * @return the no html version if needed
1232          */
1233         private String ifUnhtml(String input) {
1234                 if (isHtml() && input != null) {
1235                         return StringUtils.unhtml(input);
1236                 }
1237
1238                 return input;
1239         }
1240
1241         /**
1242          * Return a {@link BasicSupport} implementation supporting the given
1243          * resource if possible.
1244          *
1245          * @param url
1246          *            the story resource
1247          *
1248          * @return an implementation that supports it, or NULL
1249          */
1250         public static BasicSupport getSupport(URL url) {
1251                 if (url == null) {
1252                         return null;
1253                 }
1254
1255                 // TEXT and INFO_TEXT always support files (not URLs though)
1256                 for (SupportType type : SupportType.values()) {
1257                         if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1258                                 BasicSupport support = getSupport(type);
1259                                 if (support != null && support.supports(url)) {
1260                                         return support;
1261                                 }
1262                         }
1263                 }
1264
1265                 for (SupportType type : new SupportType[] { SupportType.TEXT,
1266                                 SupportType.INFO_TEXT }) {
1267                         BasicSupport support = getSupport(type);
1268                         if (support != null && support.supports(url)) {
1269                                 return support;
1270                         }
1271                 }
1272
1273                 return null;
1274         }
1275
1276         /**
1277          * Return a {@link BasicSupport} implementation supporting the given type.
1278          *
1279          * @param type
1280          *            the type
1281          *
1282          * @return an implementation that supports it, or NULL
1283          */
1284         public static BasicSupport getSupport(SupportType type) {
1285                 switch (type) {
1286                 case EPUB:
1287                         return new Epub().setType(type);
1288                 case INFO_TEXT:
1289                         return new InfoText().setType(type);
1290                 case FIMFICTION:
1291                         return new Fimfiction().setType(type);
1292                 case FANFICTION:
1293                         return new Fanfiction().setType(type);
1294                 case TEXT:
1295                         return new Text().setType(type);
1296                 case MANGAFOX:
1297                         return new MangaFox().setType(type);
1298                 case E621:
1299                         return new E621().setType(type);
1300                 case CBZ:
1301                         return new Cbz().setType(type);
1302                 }
1303
1304                 return null;
1305         }
1306 }