fanfix/supported/BasicSupport.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.awt.image.BufferedImage;
   4 import java.io.ByteArrayInputStream;
   5 import java.io.File;
   6 import java.io.IOException;
   7 import java.io.InputStream;
   8 import java.net.MalformedURLException;
   9 import java.net.URL;
  10 import java.util.ArrayList;
  11 import java.util.HashMap;
  12 import java.util.List;
  13 import java.util.Map;
  14 import java.util.Map.Entry;
  15 import java.util.Scanner;
  16
  17 import be.nikiroo.fanfix.Instance;
  18 import be.nikiroo.fanfix.bundles.Config;
  19 import be.nikiroo.fanfix.bundles.StringId;
  20 import be.nikiroo.fanfix.data.Chapter;
  21 import be.nikiroo.fanfix.data.MetaData;
  22 import be.nikiroo.fanfix.data.Paragraph;
  23 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
  24 import be.nikiroo.fanfix.data.Story;
  25 import be.nikiroo.utils.IOUtils;
  26 import be.nikiroo.utils.StringUtils;
  27
  28 /**
  29  * This class is the base class used by the other support classes. It can be
  30  * used outside of this package, and have static method that you can use to get
  31  * access to the correct support class.
  32  * <p>
  33  * It will be used with 'resources' (usually web pages or files).
  34  *
  35  * @author niki
  36  */
  37 public abstract class BasicSupport {
  38         /**
  39          * The supported input types for which we can get a {@link BasicSupport}
  40          * object.
  41          *
  42          * @author niki
  43          */
  44         public enum SupportType {
  45                 /** EPUB files created with this program */
  46                 EPUB,
  47                 /** Pure text file with some rules */
  48                 TEXT,
  49                 /** TEXT but with associated .info file */
  50                 INFO_TEXT,
  51                 /** My Little Pony fanfictions */
  52                 FIMFICTION,
  53                 /** Fanfictions from a lot of different universes */
  54                 FANFICTION,
  55                 /** Website with lots of Mangas */
  56                 MANGAFOX,
  57                 /** Furry website with comics support */
  58                 E621,
  59                 /** CBZ files */
  60                 CBZ;
  61
  62                 /**
  63                  * A description of this support type (more information than the
  64                  * {@link BasicSupport#getSourceName()}).
  65                  *
  66                  * @return the description
  67                  */
  68                 public String getDesc() {
  69                         String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
  70                                         this.name());
  71
  72                         if (desc == null) {
  73                                 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
  74                         }
  75
  76                         return desc;
  77                 }
  78
  79                 /**
  80                  * The name of this support type (a short version).
  81                  *
  82                  * @return the name
  83                  */
  84                 public String getSourceName() {
  85                         BasicSupport support = BasicSupport.getSupport(this);
  86                         if (support != null) {
  87                                 return support.getSourceName();
  88                         }
  89
  90                         return null;
  91                 }
  92
  93                 @Override
  94                 public String toString() {
  95                         return super.toString().toLowerCase();
  96                 }
  97
  98                 /**
  99                  * Call {@link SupportType#valueOf(String.toUpperCase())}.
 100                  *
 101                  * @param typeName
 102                  *            the possible type name
 103                  *
 104                  * @return NULL or the type
 105                  */
 106                 public static SupportType valueOfUC(String typeName) {
 107                         return SupportType.valueOf(typeName == null ? null : typeName
 108                                         .toUpperCase());
 109                 }
 110
 111                 /**
 112                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 113                  * NULL for NULL instead of raising exception.
 114                  *
 115                  * @param typeName
 116                  *            the possible type name
 117                  *
 118                  * @return NULL or the type
 119                  */
 120                 public static SupportType valueOfNullOkUC(String typeName) {
 121                         if (typeName == null) {
 122                                 return null;
 123                         }
 124
 125                         return SupportType.valueOfUC(typeName);
 126                 }
 127
 128                 /**
 129                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 130                  * NULL in case of error instead of raising an exception.
 131                  *
 132                  * @param typeName
 133                  *            the possible type name
 134                  *
 135                  * @return NULL or the type
 136                  */
 137                 public static SupportType valueOfAllOkUC(String typeName) {
 138                         try {
 139                                 return SupportType.valueOfUC(typeName);
 140                         } catch (Exception e) {
 141                                 return null;
 142                         }
 143                 }
 144         }
 145
 146         private InputStream in;
 147         private SupportType type;
 148         private URL currentReferer; // with on 'r', as in 'HTTP'...
 149
 150         // quote chars
 151         private char openQuote = Instance.getTrans().getChar(
 152                         StringId.OPEN_SINGLE_QUOTE);
 153         private char closeQuote = Instance.getTrans().getChar(
 154                         StringId.CLOSE_SINGLE_QUOTE);
 155         private char openDoubleQuote = Instance.getTrans().getChar(
 156                         StringId.OPEN_DOUBLE_QUOTE);
 157         private char closeDoubleQuote = Instance.getTrans().getChar(
 158                         StringId.CLOSE_DOUBLE_QUOTE);
 159
 160         /**
 161          * The name of this support class.
 162          *
 163          * @return the name
 164          */
 165         protected abstract String getSourceName();
 166
 167         /**
 168          * Check if the given resource is supported by this {@link BasicSupport}.
 169          *
 170          * @param url
 171          *            the resource to check for
 172          *
 173          * @return TRUE if it is
 174          */
 175         protected abstract boolean supports(URL url);
 176
 177         /**
 178          * Return TRUE if the support will return HTML encoded content values for
 179          * the chapters content.
 180          *
 181          * @return TRUE for HTML
 182          */
 183         protected abstract boolean isHtml();
 184
 185         protected abstract MetaData getMeta(URL source, InputStream in)
 186                         throws IOException;
 187
 188         /**
 189          * Return the story description.
 190          *
 191          * @param source
 192          *            the source of the story
 193          * @param in
 194          *            the input (the main resource)
 195          *
 196          * @return the description
 197          *
 198          * @throws IOException
 199          *             in case of I/O error
 200          */
 201         protected abstract String getDesc(URL source, InputStream in)
 202                         throws IOException;
 203
 204         /**
 205          * Return the list of chapters (name and resource).
 206          *
 207          * @param source
 208          *            the source of the story
 209          * @param in
 210          *            the input (the main resource)
 211          *
 212          * @return the chapters
 213          *
 214          * @throws IOException
 215          *             in case of I/O error
 216          */
 217         protected abstract List<Entry<String, URL>> getChapters(URL source,
 218                         InputStream in) throws IOException;
 219
 220         /**
 221          * Return the content of the chapter (possibly HTML encoded, if
 222          * {@link BasicSupport#isHtml()} is TRUE).
 223          *
 224          * @param source
 225          *            the source of the story
 226          * @param in
 227          *            the input (the main resource)
 228          * @param number
 229          *            the chapter number
 230          *
 231          * @return the content
 232          *
 233          * @throws IOException
 234          *             in case of I/O error
 235          */
 236         protected abstract String getChapterContent(URL source, InputStream in,
 237                         int number) throws IOException;
 238
 239         /**
 240          * Return the list of cookies (values included) that must be used to
 241          * correctly fetch the resources.
 242          * <p>
 243          * You are expected to call the super method implementation if you override
 244          * it.
 245          *
 246          * @return the cookies
 247          */
 248         public Map<String, String> getCookies() {
 249                 return new HashMap<String, String>();
 250         }
 251
 252         /**
 253          * Process the given story resource into a partially filled {@link Story}
 254          * object containing the name and metadata, except for the description.
 255          *
 256          * @param url
 257          *            the story resource
 258          *
 259          * @return the {@link Story}
 260          *
 261          * @throws IOException
 262          *             in case of I/O error
 263          */
 264         public Story processMeta(URL url) throws IOException {
 265                 return processMeta(url, true, false);
 266         }
 267
 268         /**
 269          * Process the given story resource into a partially filled {@link Story}
 270          * object containing the name and metadata.
 271          *
 272          * @param url
 273          *            the story resource
 274          *
 275          * @param close
 276          *            close "this" and "in" when done
 277          *
 278          * @return the {@link Story}
 279          *
 280          * @throws IOException
 281          *             in case of I/O error
 282          */
 283         protected Story processMeta(URL url, boolean close, boolean getDesc)
 284                         throws IOException {
 285                 in = Instance.getCache().open(url, this, false);
 286                 if (in == null) {
 287                         return null;
 288                 }
 289
 290                 try {
 291                         preprocess(url, getInput());
 292
 293                         Story story = new Story();
 294                         MetaData meta = getMeta(url, getInput());
 295                         story.setMeta(meta);
 296
 297                         if (meta != null && meta.getCover() == null) {
 298                                 meta.setCover(getDefaultCover(meta.getSubject()));
 299                         }
 300
 301                         if (getDesc) {
 302                                 String descChapterName = Instance.getTrans().getString(
 303                                                 StringId.DESCRIPTION);
 304                                 story.getMeta().setResume(
 305                                                 makeChapter(url, 0, descChapterName,
 306                                                                 getDesc(url, getInput())));
 307                         }
 308
 309                         return story;
 310                 } finally {
 311                         if (close) {
 312                                 try {
 313                                         close();
 314                                 } catch (IOException e) {
 315                                         Instance.syserr(e);
 316                                 }
 317
 318                                 if (in != null) {
 319                                         in.close();
 320                                 }
 321                         }
 322                 }
 323         }
 324
 325         /**
 326          * Process the given story resource into a fully filled {@link Story}
 327          * object.
 328          *
 329          * @param url
 330          *            the story resource
 331          *
 332          * @return the {@link Story}
 333          *
 334          * @throws IOException
 335          *             in case of I/O error
 336          */
 337         public Story process(URL url) throws IOException {
 338                 setCurrentReferer(url);
 339
 340                 try {
 341                         Story story = processMeta(url, false, true);
 342                         if (story == null) {
 343                                 return null;
 344                         }
 345
 346                         story.setChapters(new ArrayList<Chapter>());
 347
 348                         List<Entry<String, URL>> chapters = getChapters(url, getInput());
 349                         int i = 1;
 350                         if (chapters != null) {
 351                                 for (Entry<String, URL> chap : chapters) {
 352                                         setCurrentReferer(chap.getValue());
 353                                         InputStream chapIn = Instance.getCache().open(
 354                                                         chap.getValue(), this, true);
 355                                         try {
 356                                                 story.getChapters().add(
 357                                                                 makeChapter(url, i, chap.getKey(),
 358                                                                                 getChapterContent(url, chapIn, i)));
 359                                         } finally {
 360                                                 chapIn.close();
 361                                         }
 362
 363                                         i++;
 364                                 }
 365                         }
 366
 367                         return story;
 368
 369                 } finally {
 370                         try {
 371                                 close();
 372                         } catch (IOException e) {
 373                                 Instance.syserr(e);
 374                         }
 375
 376                         if (in != null) {
 377                                 in.close();
 378                         }
 379
 380                         currentReferer = null;
 381                 }
 382         }
 383
 384         /**
 385          * The support type.$
 386          *
 387          * @return the type
 388          */
 389         public SupportType getType() {
 390                 return type;
 391         }
 392
 393         /**
 394          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 395          * the current {@link URL} we work on.
 396          *
 397          * @return the referer
 398          */
 399         public URL getCurrentReferer() {
 400                 return currentReferer;
 401         }
 402
 403         /**
 404          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 405          * the current {@link URL} we work on.
 406          *
 407          * @param currentReferer
 408          *            the new referer
 409          */
 410         protected void setCurrentReferer(URL currentReferer) {
 411                 this.currentReferer = currentReferer;
 412         }
 413
 414         /**
 415          * The support type.
 416          *
 417          * @param type
 418          *            the new type
 419          *
 420          * @return this
 421          */
 422         protected BasicSupport setType(SupportType type) {
 423                 this.type = type;
 424                 return this;
 425         }
 426
 427         /**
 428          * Prepare the support if needed before processing.
 429          *
 430          * @param source
 431          *            the source of the story
 432          * @param in
 433          *            the input (the main resource)
 434          *
 435          * @throws IOException
 436          *             on I/O error
 437          */
 438         protected void preprocess(URL source, InputStream in) throws IOException {
 439         }
 440
 441         /**
 442          * Now that we have processed the {@link Story}, close the resources if any.
 443          *
 444          * @throws IOException
 445          *             on I/O error
 446          */
 447         protected void close() throws IOException {
 448         }
 449
 450         /**
 451          * Create a {@link Chapter} object from the given information, formatting
 452          * the content as it should be.
 453          *
 454          * @param number
 455          *            the chapter number
 456          * @param name
 457          *            the chapter name
 458          * @param content
 459          *            the chapter content
 460          *
 461          * @return the {@link Chapter}
 462          *
 463          * @throws IOException
 464          *             in case of I/O error
 465          */
 466         protected Chapter makeChapter(URL source, int number, String name,
 467                         String content) throws IOException {
 468
 469                 // Chapter name: process it correctly, then remove the possible
 470                 // redundant "Chapter x: " in front of it
 471                 String chapterName = processPara(name).getContent().trim();
 472                 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
 473                                 .split(",")) {
 474                         String chapterWord = Instance.getConfig().getStringX(
 475                                         Config.CHAPTER, lang);
 476                         if (chapterName.startsWith(chapterWord)) {
 477                                 chapterName = chapterName.substring(chapterWord.length())
 478                                                 .trim();
 479                                 break;
 480                         }
 481                 }
 482
 483                 if (chapterName.startsWith(Integer.toString(number))) {
 484                         chapterName = chapterName.substring(
 485                                         Integer.toString(number).length()).trim();
 486                 }
 487
 488                 if (chapterName.startsWith(":")) {
 489                         chapterName = chapterName.substring(1).trim();
 490                 }
 491                 //
 492
 493                 Chapter chap = new Chapter(number, chapterName);
 494
 495                 if (content == null) {
 496                         return chap;
 497                 }
 498
 499                 if (isHtml()) {
 500                         // Special <HR> processing:
 501                         content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
 502                                         "\n* * *\n");
 503                 }
 504
 505                 InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8"));
 506                 try {
 507                         @SuppressWarnings("resource")
 508                         Scanner scan = new Scanner(in, "UTF-8");
 509                         scan.useDelimiter("(\\n|</p>)"); // \n for test, </p> for html
 510
 511                         List<Paragraph> paras = new ArrayList<Paragraph>();
 512                         while (scan.hasNext()) {
 513                                 String line = scan.next().trim();
 514                                 boolean image = false;
 515                                 if (line.startsWith("[") && line.endsWith("]")) {
 516                                         URL url = getImageUrl(this, source,
 517                                                         line.substring(1, line.length() - 1).trim());
 518                                         if (url != null) {
 519                                                 paras.add(new Paragraph(url));
 520                                                 image = true;
 521                                         }
 522                                 }
 523
 524                                 if (!image) {
 525                                         paras.add(processPara(line));
 526                                 }
 527                         }
 528
 529                         // Check quotes for "bad" format
 530                         List<Paragraph> newParas = new ArrayList<Paragraph>();
 531                         for (Paragraph para : paras) {
 532                                 newParas.addAll(requotify(para));
 533                         }
 534                         paras = newParas;
 535
 536                         // Remove double blanks/brks
 537                         boolean space = false;
 538                         boolean brk = true;
 539                         for (int i = 0; i < paras.size(); i++) {
 540                                 Paragraph para = paras.get(i);
 541                                 boolean thisSpace = para.getType() == ParagraphType.BLANK;
 542                                 boolean thisBrk = para.getType() == ParagraphType.BREAK;
 543
 544                                 if (space && thisBrk) {
 545                                         paras.remove(i - 1);
 546                                         i--;
 547                                 } else if ((space || brk) && (thisSpace || thisBrk)) {
 548                                         paras.remove(i);
 549                                         i--;
 550                                 }
 551
 552                                 space = thisSpace;
 553                                 brk = thisBrk;
 554                         }
 555
 556                         // Remove blank/brk at start
 557                         if (paras.size() > 0
 558                                         && (paras.get(0).getType() == ParagraphType.BLANK || paras
 559                                                         .get(0).getType() == ParagraphType.BREAK)) {
 560                                 paras.remove(0);
 561                         }
 562
 563                         // Remove blank/brk at end
 564                         int last = paras.size() - 1;
 565                         if (paras.size() > 0
 566                                         && (paras.get(last).getType() == ParagraphType.BLANK || paras
 567                                                         .get(last).getType() == ParagraphType.BREAK)) {
 568                                 paras.remove(last);
 569                         }
 570
 571                         chap.setParagraphs(paras);
 572
 573                         return chap;
 574                 } finally {
 575                         in.close();
 576                 }
 577         }
 578
 579         static BufferedImage getDefaultCover(String subject) {
 580                 if (subject != null && !subject.isEmpty()
 581                                 && Instance.getCoverDir() != null) {
 582                         try {
 583                                 File fileCover = new File(Instance.getCoverDir(), subject);
 584                                 return getImage(null, fileCover.toURI().toURL(), subject);
 585                         } catch (MalformedURLException e) {
 586                         }
 587                 }
 588
 589                 return null;
 590         }
 591
 592         /**
 593          * Return the list of supported image extensions.
 594          *
 595          * @return the extensions
 596          */
 597         static String[] getImageExt(boolean emptyAllowed) {
 598                 if (emptyAllowed) {
 599                         return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 600                 } else {
 601                         return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 602                 }
 603         }
 604
 605         static BufferedImage getImage(BasicSupport support, URL source, String line) {
 606                 URL url = getImageUrl(support, source, line);
 607                 if (url != null) {
 608                         InputStream in = null;
 609                         try {
 610                                 in = Instance.getCache().open(url, getSupport(url), true);
 611                                 return IOUtils.toImage(in);
 612                         } catch (IOException e) {
 613                         } finally {
 614                                 if (in != null) {
 615                                         try {
 616                                                 in.close();
 617                                         } catch (IOException e) {
 618                                         }
 619                                 }
 620                         }
 621                 }
 622
 623                 return null;
 624         }
 625
 626         /**
 627          * Check if the given resource can be a local image or a remote image, then
 628          * refresh the cache with it if it is.
 629          *
 630          * @param source
 631          *            the story source
 632          * @param line
 633          *            the resource to check
 634          *
 635          * @return the image URL if found, or NULL
 636          *
 637          */
 638         static URL getImageUrl(BasicSupport support, URL source, String line) {
 639                 URL url = null;
 640
 641                 if (line != null) {
 642                         // try for files
 643                         String path = null;
 644                         if (source != null) {
 645                                 path = new File(source.getFile()).getParent();
 646                                 try {
 647                                         String basePath = new File(new File(path), line.trim())
 648                                                         .getAbsolutePath();
 649                                         for (String ext : getImageExt(true)) {
 650                                                 if (new File(basePath + ext).exists()) {
 651                                                         url = new File(basePath + ext).toURI().toURL();
 652                                                 }
 653                                         }
 654                                 } catch (Exception e) {
 655                                         // Nothing to do here
 656                                 }
 657                         }
 658
 659                         if (url == null) {
 660                                 // try for URLs
 661                                 try {
 662                                         for (String ext : getImageExt(true)) {
 663                                                 if (Instance.getCache().check(new URL(line + ext))) {
 664                                                         url = new URL(line + ext);
 665                                                         break;
 666                                                 }
 667                                         }
 668
 669                                         // try out of cache
 670                                         if (url == null) {
 671                                                 for (String ext : getImageExt(true)) {
 672                                                         try {
 673                                                                 url = new URL(line + ext);
 674                                                                 Instance.getCache().refresh(url, support, true);
 675                                                                 break;
 676                                                         } catch (IOException e) {
 677                                                                 // no image with this ext
 678                                                                 url = null;
 679                                                         }
 680                                                 }
 681                                         }
 682                                 } catch (MalformedURLException e) {
 683                                         // Not an url
 684                                 }
 685                         }
 686
 687                         // refresh the cached file
 688                         if (url != null) {
 689                                 try {
 690                                         Instance.getCache().refresh(url, support, true);
 691                                 } catch (IOException e) {
 692                                         // woops, broken image
 693                                         url = null;
 694                                 }
 695                         }
 696                 }
 697
 698                 return url;
 699         }
 700
 701         protected InputStream reset(InputStream in) {
 702                 try {
 703                         in.reset();
 704                 } catch (IOException e) {
 705                 }
 706                 return in;
 707         }
 708
 709         /**
 710          * Reset then return {@link BasicSupport#in}.
 711          *
 712          * @return {@link BasicSupport#in}
 713          */
 714         protected InputStream getInput() {
 715                 return reset(in);
 716         }
 717
 718         /**
 719          * Fix the author name if it is prefixed with some "by" {@link String}.
 720          *
 721          * @param author
 722          *            the author with a possible prefix
 723          *
 724          * @return the author without prefixes
 725          */
 726         protected String fixAuthor(String author) {
 727                 if (author != null) {
 728                         for (String suffix : new String[] { " ", ":" }) {
 729                                 for (String byString : Instance.getConfig()
 730                                                 .getString(Config.BYS).split(",")) {
 731                                         byString += suffix;
 732                                         if (author.toUpperCase().startsWith(byString.toUpperCase())) {
 733                                                 author = author.substring(byString.length()).trim();
 734                                         }
 735                                 }
 736                         }
 737
 738                         // Special case (without suffix):
 739                         if (author.startsWith("©")) {
 740                                 author = author.substring(1);
 741                         }
 742                 }
 743
 744                 return author;
 745         }
 746
 747         /**
 748          * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
 749          * and requotify them (i.e., separate them into QUOTE paragraphs and other
 750          * paragraphs (quotes or not)).
 751          *
 752          * @param para
 753          *            the paragraph to requotify (not necessaraly a quote)
 754          *
 755          * @return the correctly (or so we hope) quotified paragraphs
 756          */
 757         private List<Paragraph> requotify(Paragraph para) {
 758                 List<Paragraph> newParas = new ArrayList<Paragraph>();
 759
 760                 if (para.getType() == ParagraphType.QUOTE
 761                                 && para.getContent().length() > 2) {
 762                         String line = para.getContent();
 763                         boolean singleQ = line.startsWith("" + openQuote);
 764                         boolean doubleQ = line.startsWith("" + openDoubleQuote);
 765
 766                         if (!singleQ && !doubleQ) {
 767                                 line = openDoubleQuote + line + closeDoubleQuote;
 768                                 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
 769                         } else {
 770                                 char open = singleQ ? openQuote : openDoubleQuote;
 771                                 char close = singleQ ? closeQuote : closeDoubleQuote;
 772
 773                                 int posDot = -1;
 774                                 boolean inQuote = false;
 775                                 int i = 0;
 776                                 for (char car : line.toCharArray()) {
 777                                         if (car == open) {
 778                                                 inQuote = true;
 779                                         } else if (car == close) {
 780                                                 inQuote = false;
 781                                         } else if (car == '.' && !inQuote) {
 782                                                 posDot = i;
 783                                                 break;
 784                                         }
 785                                         i++;
 786                                 }
 787
 788                                 if (posDot >= 0) {
 789                                         String rest = line.substring(posDot + 1).trim();
 790                                         line = line.substring(0, posDot + 1).trim();
 791                                         newParas.add(new Paragraph(ParagraphType.QUOTE, line));
 792                                         if (!rest.isEmpty()) {
 793                                                 newParas.addAll(requotify(processPara(rest)));
 794                                         }
 795                                 } else {
 796                                         newParas.add(para);
 797                                 }
 798                         }
 799                 } else {
 800                         newParas.add(para);
 801                 }
 802
 803                 return newParas;
 804         }
 805
 806         /**
 807          * Process a {@link Paragraph} from a raw line of text.
 808          * <p>
 809          * Will also fix quotes and HTML encoding if needed.
 810          *
 811          * @param line
 812          *            the raw line
 813          *
 814          * @return the processed {@link Paragraph}
 815          */
 816         private Paragraph processPara(String line) {
 817                 line = ifUnhtml(line).trim();
 818
 819                 boolean space = true;
 820                 boolean brk = true;
 821                 boolean quote = false;
 822                 boolean tentativeCloseQuote = false;
 823                 char prev = '\0';
 824                 int dashCount = 0;
 825
 826                 StringBuilder builder = new StringBuilder();
 827                 for (char car : line.toCharArray()) {
 828                         if (car != '-') {
 829                                 if (dashCount > 0) {
 830                                         // dash, ndash and mdash: - – —
 831                                         // currently: always use mdash
 832                                         builder.append(dashCount == 1 ? '-' : '—');
 833                                 }
 834                                 dashCount = 0;
 835                         }
 836
 837                         if (tentativeCloseQuote) {
 838                                 tentativeCloseQuote = false;
 839                                 if ((car >= 'a' && car <= 'z') || (car >= 'A' && car <= 'Z')
 840                                                 || (car >= '0' && car <= '9')) {
 841                                         builder.append("'");
 842                                 } else {
 843                                         builder.append(closeQuote);
 844                                 }
 845                         }
 846
 847                         switch (car) {
 848                         case ' ': // note: unbreakable space
 849                         case ' ':
 850                         case '\t':
 851                         case '\n': // just in case
 852                         case '\r': // just in case
 853                                 builder.append(' ');
 854                                 break;
 855
 856                         case '\'':
 857                                 if (space || (brk && quote)) {
 858                                         quote = true;
 859                                         builder.append(openQuote);
 860                                 } else if (prev == ' ') {
 861                                         builder.append(openQuote);
 862                                 } else {
 863                                         // it is a quote ("I'm off") or a 'quote' ("This
 864                                         // 'good' restaurant"...)
 865                                         tentativeCloseQuote = true;
 866                                 }
 867                                 break;
 868
 869                         case '"':
 870                                 if (space || (brk && quote)) {
 871                                         quote = true;
 872                                         builder.append(openDoubleQuote);
 873                                 } else if (prev == ' ') {
 874                                         builder.append(openDoubleQuote);
 875                                 } else {
 876                                         builder.append(closeDoubleQuote);
 877                                 }
 878                                 break;
 879
 880                         case '-':
 881                                 if (space) {
 882                                         quote = true;
 883                                 } else {
 884                                         dashCount++;
 885                                 }
 886                                 space = false;
 887                                 break;
 888
 889                         case '*':
 890                         case '~':
 891                         case '/':
 892                         case '\\':
 893                         case '<':
 894                         case '>':
 895                         case '=':
 896                         case '+':
 897                         case '_':
 898                         case '–':
 899                         case '—':
 900                                 space = false;
 901                                 builder.append(car);
 902                                 break;
 903
 904                         case '‘':
 905                         case '`':
 906                         case '‹':
 907                         case '﹁':
 908                         case '〈':
 909                         case '「':
 910                                 if (space || (brk && quote)) {
 911                                         quote = true;
 912                                         builder.append(openQuote);
 913                                 } else {
 914                                         builder.append(openQuote);
 915                                 }
 916                                 space = false;
 917                                 brk = false;
 918                                 break;
 919
 920                         case '’':
 921                         case '›':
 922                         case '﹂':
 923                         case '〉':
 924                         case '」':
 925                                 space = false;
 926                                 brk = false;
 927                                 builder.append(closeQuote);
 928                                 break;
 929
 930                         case '«':
 931                         case '“':
 932                         case '﹃':
 933                         case '《':
 934                         case '『':
 935                                 if (space || (brk && quote)) {
 936                                         quote = true;
 937                                         builder.append(openDoubleQuote);
 938                                 } else {
 939                                         builder.append(openDoubleQuote);
 940                                 }
 941                                 space = false;
 942                                 brk = false;
 943                                 break;
 944
 945                         case '»':
 946                         case '”':
 947                         case '﹄':
 948                         case '》':
 949                         case '』':
 950                                 space = false;
 951                                 brk = false;
 952                                 builder.append(closeDoubleQuote);
 953                                 break;
 954
 955                         default:
 956                                 space = false;
 957                                 brk = false;
 958                                 builder.append(car);
 959                                 break;
 960                         }
 961
 962                         prev = car;
 963                 }
 964
 965                 if (tentativeCloseQuote) {
 966                         tentativeCloseQuote = false;
 967                         builder.append(closeQuote);
 968                 }
 969
 970                 line = builder.toString().trim();
 971
 972                 ParagraphType type = ParagraphType.NORMAL;
 973                 if (space) {
 974                         type = ParagraphType.BLANK;
 975                 } else if (brk) {
 976                         type = ParagraphType.BREAK;
 977                 } else if (quote) {
 978                         type = ParagraphType.QUOTE;
 979                 }
 980
 981                 return new Paragraph(type, line);
 982         }
 983
 984         /**
 985          * Remove the HTML from the inpit <b>if</b> {@link BasicSupport#isHtml()} is
 986          * true.
 987          *
 988          * @param input
 989          *            the input
 990          *
 991          * @return the no html version if needed
 992          */
 993         private String ifUnhtml(String input) {
 994                 if (isHtml() && input != null) {
 995                         return StringUtils.unhtml(input);
 996                 }
 997
 998                 return input;
 999         }
1000
1001         /**
1002          * Return a {@link BasicSupport} implementation supporting the given
1003          * resource if possible.
1004          *
1005          * @param url
1006          *            the story resource
1007          *
1008          * @return an implementation that supports it, or NULL
1009          */
1010         public static BasicSupport getSupport(URL url) {
1011                 if (url == null) {
1012                         return null;
1013                 }
1014
1015                 // TEXT and INFO_TEXT always support files (not URLs though)
1016                 for (SupportType type : SupportType.values()) {
1017                         if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1018                                 BasicSupport support = getSupport(type);
1019                                 if (support != null && support.supports(url)) {
1020                                         return support;
1021                                 }
1022                         }
1023                 }
1024
1025                 for (SupportType type : new SupportType[] { SupportType.TEXT,
1026                                 SupportType.INFO_TEXT }) {
1027                         BasicSupport support = getSupport(type);
1028                         if (support != null && support.supports(url)) {
1029                                 return support;
1030                         }
1031                 }
1032
1033                 return null;
1034         }
1035
1036         /**
1037          * Return a {@link BasicSupport} implementation supporting the given type.
1038          *
1039          * @param type
1040          *            the type
1041          *
1042          * @return an implementation that supports it, or NULL
1043          */
1044         public static BasicSupport getSupport(SupportType type) {
1045                 switch (type) {
1046                 case EPUB:
1047                         return new Epub().setType(type);
1048                 case INFO_TEXT:
1049                         return new InfoText().setType(type);
1050                 case FIMFICTION:
1051                         return new Fimfiction().setType(type);
1052                 case FANFICTION:
1053                         return new Fanfiction().setType(type);
1054                 case TEXT:
1055                         return new Text().setType(type);
1056                 case MANGAFOX:
1057                         return new MangaFox().setType(type);
1058                 case E621:
1059                         return new E621().setType(type);
1060                 case CBZ:
1061                         return new Cbz().setType(type);
1062                 }
1063
1064                 return null;
1065         }
1066
1067         /**
1068          * Return the first line from the given input which correspond to the given
1069          * selectors.
1070          *
1071          * @param in
1072          *            the input
1073          * @param needle
1074          *            a string that must be found inside the target line (also
1075          *            supports "^" at start to say "only if it starts with" the
1076          *            needle)
1077          * @param relativeLine
1078          *            the line to return based upon the target line position (-1 =
1079          *            the line before, 0 = the target line...)
1080          *
1081          * @return the line
1082          */
1083         static String getLine(InputStream in, String needle, int relativeLine) {
1084                 return getLine(in, needle, relativeLine, true);
1085         }
1086
1087         /**
1088          * Return a line from the given input which correspond to the given
1089          * selectors.
1090          *
1091          * @param in
1092          *            the input
1093          * @param needle
1094          *            a string that must be found inside the target line (also
1095          *            supports "^" at start to say "only if it starts with" the
1096          *            needle)
1097          * @param relativeLine
1098          *            the line to return based upon the target line position (-1 =
1099          *            the line before, 0 = the target line...)
1100          * @param first
1101          *            takes the first result (as opposed to the last one, which will
1102          *            also always spend the input)
1103          *
1104          * @return the line
1105          */
1106         static String getLine(InputStream in, String needle, int relativeLine,
1107                         boolean first) {
1108                 String rep = null;
1109
1110                 try {
1111                         in.reset();
1112                 } catch (IOException e) {
1113                         Instance.syserr(e);
1114                 }
1115
1116                 List<String> lines = new ArrayList<String>();
1117                 @SuppressWarnings("resource")
1118                 Scanner scan = new Scanner(in, "UTF-8");
1119                 int index = -1;
1120                 scan.useDelimiter("\\n");
1121                 while (scan.hasNext()) {
1122                         lines.add(scan.next());
1123
1124                         if (index == -1) {
1125                                 if (needle.startsWith("^")) {
1126                                         if (lines.get(lines.size() - 1).startsWith(
1127                                                         needle.substring(1))) {
1128                                                 index = lines.size() - 1;
1129                                         }
1130
1131                                 } else {
1132                                         if (lines.get(lines.size() - 1).contains(needle)) {
1133                                                 index = lines.size() - 1;
1134                                         }
1135                                 }
1136                         }
1137
1138                         if (index >= 0 && index + relativeLine < lines.size()) {
1139                                 rep = lines.get(index + relativeLine);
1140                                 if (first) {
1141                                         break;
1142                                 }
1143                         }
1144                 }
1145
1146                 return rep;
1147         }
1148 }