BasicSupport.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.awt.image.BufferedImage;
   4 import java.io.ByteArrayInputStream;
   5 import java.io.File;
   6 import java.io.IOException;
   7 import java.io.InputStream;
   8 import java.net.MalformedURLException;
   9 import java.net.URL;
  10 import java.util.ArrayList;
  11 import java.util.HashMap;
  12 import java.util.List;
  13 import java.util.Map;
  14 import java.util.Map.Entry;
  15 import java.util.Scanner;
  16
  17 import javax.imageio.ImageIO;
  18
  19 import be.nikiroo.fanfix.Instance;
  20 import be.nikiroo.fanfix.bundles.Config;
  21 import be.nikiroo.fanfix.bundles.StringId;
  22 import be.nikiroo.fanfix.data.Chapter;
  23 import be.nikiroo.fanfix.data.MetaData;
  24 import be.nikiroo.fanfix.data.Paragraph;
  25 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
  26 import be.nikiroo.fanfix.data.Story;
  27 import be.nikiroo.utils.StringUtils;
  28
  29 /**
  30  * This class is the base class used by the other support classes. It can be
  31  * used outside of this package, and have static method that you can use to get
  32  * access to the correct support class.
  33  * <p>
  34  * It will be used with 'resources' (usually web pages or files).
  35  *
  36  * @author niki
  37  */
  38 public abstract class BasicSupport {
  39         /**
  40          * The supported input types for which we can get a {@link BasicSupport}
  41          * object.
  42          *
  43          * @author niki
  44          */
  45         public enum SupportType {
  46                 /** EPUB files created with this program */
  47                 EPUB,
  48                 /** Pure text file with some rules */
  49                 TEXT,
  50                 /** TEXT but with associated .info file */
  51                 INFO_TEXT,
  52                 /** My Little Pony fanfictions */
  53                 FIMFICTION,
  54                 /** Fanfictions from a lot of different universes */
  55                 FANFICTION,
  56                 /** Website with lots of Mangas */
  57                 MANGAFOX,
  58                 /** Furry website with comics support */
  59                 E621,
  60                 /** CBZ files */
  61                 CBZ;
  62
  63                 /**
  64                  * A description of this support type (more information than the
  65                  * {@link BasicSupport#getSourceName()}).
  66                  *
  67                  * @return the description
  68                  */
  69                 public String getDesc() {
  70                         String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
  71                                         this.name());
  72
  73                         if (desc == null) {
  74                                 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
  75                         }
  76
  77                         return desc;
  78                 }
  79
  80                 /**
  81                  * The name of this support type (a short version).
  82                  *
  83                  * @return the name
  84                  */
  85                 public String getSourceName() {
  86                         BasicSupport support = BasicSupport.getSupport(this);
  87                         if (support != null) {
  88                                 return support.getSourceName();
  89                         }
  90
  91                         return null;
  92                 }
  93
  94                 @Override
  95                 public String toString() {
  96                         return super.toString().toLowerCase();
  97                 }
  98
  99                 /**
 100                  * Call {@link SupportType#valueOf(String.toUpperCase())}.
 101                  *
 102                  * @param typeName
 103                  *            the possible type name
 104                  *
 105                  * @return NULL or the type
 106                  */
 107                 public static SupportType valueOfUC(String typeName) {
 108                         return SupportType.valueOf(typeName == null ? null : typeName
 109                                         .toUpperCase());
 110                 }
 111
 112                 /**
 113                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 114                  * NULL for NULL instead of raising exception.
 115                  *
 116                  * @param typeName
 117                  *            the possible type name
 118                  *
 119                  * @return NULL or the type
 120                  */
 121                 public static SupportType valueOfNullOkUC(String typeName) {
 122                         if (typeName == null) {
 123                                 return null;
 124                         }
 125
 126                         return SupportType.valueOfUC(typeName);
 127                 }
 128
 129                 /**
 130                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 131                  * NULL in case of error instead of raising an exception.
 132                  *
 133                  * @param typeName
 134                  *            the possible type name
 135                  *
 136                  * @return NULL or the type
 137                  */
 138                 public static SupportType valueOfAllOkUC(String typeName) {
 139                         try {
 140                                 return SupportType.valueOfUC(typeName);
 141                         } catch (Exception e) {
 142                                 return null;
 143                         }
 144                 }
 145         }
 146
 147         private InputStream in;
 148         private SupportType type;
 149         private URL currentReferer; // with on 'r', as in 'HTTP'...
 150
 151         // quote chars
 152         private char openQuote = Instance.getTrans().getChar(
 153                         StringId.OPEN_SINGLE_QUOTE);
 154         private char closeQuote = Instance.getTrans().getChar(
 155                         StringId.CLOSE_SINGLE_QUOTE);
 156         private char openDoubleQuote = Instance.getTrans().getChar(
 157                         StringId.OPEN_DOUBLE_QUOTE);
 158         private char closeDoubleQuote = Instance.getTrans().getChar(
 159                         StringId.CLOSE_DOUBLE_QUOTE);
 160
 161         /**
 162          * The name of this support class.
 163          *
 164          * @return the name
 165          */
 166         protected abstract String getSourceName();
 167
 168         /**
 169          * Check if the given resource is supported by this {@link BasicSupport}.
 170          *
 171          * @param url
 172          *            the resource to check for
 173          *
 174          * @return TRUE if it is
 175          */
 176         protected abstract boolean supports(URL url);
 177
 178         /**
 179          * Return TRUE if the support will return HTML encoded content values for
 180          * the chapters content.
 181          *
 182          * @return TRUE for HTML
 183          */
 184         protected abstract boolean isHtml();
 185
 186         protected abstract MetaData getMeta(URL source, InputStream in)
 187                         throws IOException;
 188
 189         /**
 190          * Return the story description.
 191          *
 192          * @param source
 193          *            the source of the story
 194          * @param in
 195          *            the input (the main resource)
 196          *
 197          * @return the description
 198          *
 199          * @throws IOException
 200          *             in case of I/O error
 201          */
 202         protected abstract String getDesc(URL source, InputStream in)
 203                         throws IOException;
 204
 205         /**
 206          * Return the list of chapters (name and resource).
 207          *
 208          * @param source
 209          *            the source of the story
 210          * @param in
 211          *            the input (the main resource)
 212          *
 213          * @return the chapters
 214          *
 215          * @throws IOException
 216          *             in case of I/O error
 217          */
 218         protected abstract List<Entry<String, URL>> getChapters(URL source,
 219                         InputStream in) throws IOException;
 220
 221         /**
 222          * Return the content of the chapter (possibly HTML encoded, if
 223          * {@link BasicSupport#isHtml()} is TRUE).
 224          *
 225          * @param source
 226          *            the source of the story
 227          * @param in
 228          *            the input (the main resource)
 229          * @param number
 230          *            the chapter number
 231          *
 232          * @return the content
 233          *
 234          * @throws IOException
 235          *             in case of I/O error
 236          */
 237         protected abstract String getChapterContent(URL source, InputStream in,
 238                         int number) throws IOException;
 239
 240         /**
 241          * Return the list of cookies (values included) that must be used to
 242          * correctly fetch the resources.
 243          * <p>
 244          * You are expected to call the super method implementation if you override
 245          * it.
 246          *
 247          * @return the cookies
 248          */
 249         public Map<String, String> getCookies() {
 250                 return new HashMap<String, String>();
 251         }
 252
 253         /**
 254          * Process the given story resource into a partially filled {@link Story}
 255          * object containing the name and metadata, except for the description.
 256          *
 257          * @param url
 258          *            the story resource
 259          *
 260          * @return the {@link Story}
 261          *
 262          * @throws IOException
 263          *             in case of I/O error
 264          */
 265         public Story processMeta(URL url) throws IOException {
 266                 return processMeta(url, true, false);
 267         }
 268
 269         /**
 270          * Process the given story resource into a partially filled {@link Story}
 271          * object containing the name and metadata.
 272          *
 273          * @param url
 274          *            the story resource
 275          *
 276          * @param close
 277          *            close "this" and "in" when done
 278          *
 279          * @return the {@link Story}
 280          *
 281          * @throws IOException
 282          *             in case of I/O error
 283          */
 284         protected Story processMeta(URL url, boolean close, boolean getDesc)
 285                         throws IOException {
 286                 in = Instance.getCache().open(url, this, false);
 287                 if (in == null) {
 288                         return null;
 289                 }
 290
 291                 try {
 292                         preprocess(url, getInput());
 293
 294                         Story story = new Story();
 295                         MetaData meta = getMeta(url, getInput());
 296                         story.setMeta(meta);
 297
 298                         if (meta != null && meta.getCover() == null) {
 299                                 meta.setCover(getDefaultCover(meta.getSubject()));
 300                         }
 301
 302                         if (getDesc) {
 303                                 String descChapterName = Instance.getTrans().getString(
 304                                                 StringId.DESCRIPTION);
 305                                 story.getMeta().setResume(
 306                                                 makeChapter(url, 0, descChapterName,
 307                                                                 getDesc(url, getInput())));
 308                         }
 309
 310                         return story;
 311                 } finally {
 312                         if (close) {
 313                                 try {
 314                                         close();
 315                                 } catch (IOException e) {
 316                                         Instance.syserr(e);
 317                                 }
 318
 319                                 if (in != null) {
 320                                         in.close();
 321                                 }
 322                         }
 323                 }
 324         }
 325
 326         /**
 327          * Process the given story resource into a fully filled {@link Story}
 328          * object.
 329          *
 330          * @param url
 331          *            the story resource
 332          *
 333          * @return the {@link Story}
 334          *
 335          * @throws IOException
 336          *             in case of I/O error
 337          */
 338         public Story process(URL url) throws IOException {
 339                 setCurrentReferer(url);
 340
 341                 try {
 342                         Story story = processMeta(url, false, true);
 343                         if (story == null) {
 344                                 return null;
 345                         }
 346
 347                         story.setChapters(new ArrayList<Chapter>());
 348
 349                         List<Entry<String, URL>> chapters = getChapters(url, getInput());
 350                         int i = 1;
 351                         if (chapters != null) {
 352                                 for (Entry<String, URL> chap : chapters) {
 353                                         setCurrentReferer(chap.getValue());
 354                                         InputStream chapIn = Instance.getCache().open(
 355                                                         chap.getValue(), this, true);
 356                                         try {
 357                                                 story.getChapters().add(
 358                                                                 makeChapter(url, i, chap.getKey(),
 359                                                                                 getChapterContent(url, chapIn, i)));
 360                                         } finally {
 361                                                 chapIn.close();
 362                                         }
 363
 364                                         i++;
 365                                 }
 366                         }
 367
 368                         return story;
 369
 370                 } finally {
 371                         try {
 372                                 close();
 373                         } catch (IOException e) {
 374                                 Instance.syserr(e);
 375                         }
 376
 377                         if (in != null) {
 378                                 in.close();
 379                         }
 380
 381                         currentReferer = null;
 382                 }
 383         }
 384
 385         /**
 386          * The support type.$
 387          *
 388          * @return the type
 389          */
 390         public SupportType getType() {
 391                 return type;
 392         }
 393
 394         /**
 395          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 396          * the current {@link URL} we work on.
 397          *
 398          * @return the referer
 399          */
 400         public URL getCurrentReferer() {
 401                 return currentReferer;
 402         }
 403
 404         /**
 405          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 406          * the current {@link URL} we work on.
 407          *
 408          * @param currentReferer
 409          *            the new referer
 410          */
 411         protected void setCurrentReferer(URL currentReferer) {
 412                 this.currentReferer = currentReferer;
 413         }
 414
 415         /**
 416          * The support type.
 417          *
 418          * @param type
 419          *            the new type
 420          *
 421          * @return this
 422          */
 423         protected BasicSupport setType(SupportType type) {
 424                 this.type = type;
 425                 return this;
 426         }
 427
 428         /**
 429          * Prepare the support if needed before processing.
 430          *
 431          * @param source
 432          *            the source of the story
 433          * @param in
 434          *            the input (the main resource)
 435          *
 436          * @throws IOException
 437          *             on I/O error
 438          */
 439         protected void preprocess(URL source, InputStream in) throws IOException {
 440         }
 441
 442         /**
 443          * Now that we have processed the {@link Story}, close the resources if any.
 444          *
 445          * @throws IOException
 446          *             on I/O error
 447          */
 448         protected void close() throws IOException {
 449         }
 450
 451         /**
 452          * Create a {@link Chapter} object from the given information, formatting
 453          * the content as it should be.
 454          *
 455          * @param number
 456          *            the chapter number
 457          * @param name
 458          *            the chapter name
 459          * @param content
 460          *            the chapter content
 461          *
 462          * @return the {@link Chapter}
 463          *
 464          * @throws IOException
 465          *             in case of I/O error
 466          */
 467         protected Chapter makeChapter(URL source, int number, String name,
 468                         String content) throws IOException {
 469
 470                 // Chapter name: process it correctly, then remove the possible
 471                 // redundant "Chapter x: " in front of it
 472                 String chapterName = processPara(name).getContent().trim();
 473                 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
 474                                 .split(",")) {
 475                         String chapterWord = Instance.getConfig().getStringX(
 476                                         Config.CHAPTER, lang);
 477                         if (chapterName.startsWith(chapterWord)) {
 478                                 chapterName = chapterName.substring(chapterWord.length())
 479                                                 .trim();
 480                                 break;
 481                         }
 482                 }
 483
 484                 if (chapterName.startsWith(Integer.toString(number))) {
 485                         chapterName = chapterName.substring(
 486                                         Integer.toString(number).length()).trim();
 487                 }
 488
 489                 if (chapterName.startsWith(":")) {
 490                         chapterName = chapterName.substring(1).trim();
 491                 }
 492                 //
 493
 494                 Chapter chap = new Chapter(number, chapterName);
 495
 496                 if (content == null) {
 497                         return chap;
 498                 }
 499
 500                 if (isHtml()) {
 501                         // Special <HR> processing:
 502                         content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
 503                                         "\n* * *\n");
 504                 }
 505
 506                 InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8"));
 507                 try {
 508                         @SuppressWarnings("resource")
 509                         Scanner scan = new Scanner(in, "UTF-8");
 510                         scan.useDelimiter("(\\n|</p>)"); // \n for test, </p> for html
 511
 512                         List<Paragraph> paras = new ArrayList<Paragraph>();
 513                         while (scan.hasNext()) {
 514                                 String line = scan.next().trim();
 515                                 boolean image = false;
 516                                 if (line.startsWith("[") && line.endsWith("]")) {
 517                                         URL url = getImageUrl(this, source,
 518                                                         line.substring(1, line.length() - 1).trim());
 519                                         if (url != null) {
 520                                                 paras.add(new Paragraph(url));
 521                                                 image = true;
 522                                         }
 523                                 }
 524
 525                                 if (!image) {
 526                                         paras.add(processPara(line));
 527                                 }
 528                         }
 529
 530                         // Check quotes for "bad" format
 531                         List<Paragraph> newParas = new ArrayList<Paragraph>();
 532                         for (Paragraph para : paras) {
 533                                 newParas.addAll(requotify(para));
 534                         }
 535                         paras = newParas;
 536
 537                         // Remove double blanks/brks
 538                         boolean space = false;
 539                         boolean brk = true;
 540                         for (int i = 0; i < paras.size(); i++) {
 541                                 Paragraph para = paras.get(i);
 542                                 boolean thisSpace = para.getType() == ParagraphType.BLANK;
 543                                 boolean thisBrk = para.getType() == ParagraphType.BREAK;
 544
 545                                 if (space && thisBrk) {
 546                                         paras.remove(i - 1);
 547                                         i--;
 548                                 } else if ((space || brk) && (thisSpace || thisBrk)) {
 549                                         paras.remove(i);
 550                                         i--;
 551                                 }
 552
 553                                 space = thisSpace;
 554                                 brk = thisBrk;
 555                         }
 556
 557                         // Remove blank/brk at start
 558                         if (paras.size() > 0
 559                                         && (paras.get(0).getType() == ParagraphType.BLANK || paras
 560                                                         .get(0).getType() == ParagraphType.BREAK)) {
 561                                 paras.remove(0);
 562                         }
 563
 564                         // Remove blank/brk at end
 565                         int last = paras.size() - 1;
 566                         if (paras.size() > 0
 567                                         && (paras.get(last).getType() == ParagraphType.BLANK || paras
 568                                                         .get(last).getType() == ParagraphType.BREAK)) {
 569                                 paras.remove(last);
 570                         }
 571
 572                         chap.setParagraphs(paras);
 573
 574                         return chap;
 575                 } finally {
 576                         in.close();
 577                 }
 578         }
 579
 580         static BufferedImage getDefaultCover(String subject) {
 581                 if (subject != null && !subject.isEmpty()
 582                                 && Instance.getCoverDir() != null) {
 583                         try {
 584                                 File fileCover = new File(Instance.getCoverDir(), subject);
 585                                 return getImage(null, fileCover.toURI().toURL(), subject);
 586                         } catch (MalformedURLException e) {
 587                         }
 588                 }
 589
 590                 return null;
 591         }
 592
 593         /**
 594          * Return the list of supported image extensions.
 595          *
 596          * @return the extensions
 597          */
 598         static String[] getImageExt(boolean emptyAllowed) {
 599                 if (emptyAllowed) {
 600                         return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 601                 } else {
 602                         return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 603                 }
 604         }
 605
 606         static BufferedImage getImage(BasicSupport support, URL source, String line) {
 607                 URL url = getImageUrl(support, source, line);
 608                 if (url != null) {
 609                         InputStream in = null;
 610                         try {
 611                                 in = Instance.getCache().open(url, getSupport(url), true);
 612                                 return ImageIO.read(in);
 613                         } catch (IOException e) {
 614                         } finally {
 615                                 if (in != null) {
 616                                         try {
 617                                                 in.close();
 618                                         } catch (IOException e) {
 619                                         }
 620                                 }
 621                         }
 622                 }
 623
 624                 return null;
 625         }
 626
 627         /**
 628          * Check if the given resource can be a local image or a remote image, then
 629          * refresh the cache with it if it is.
 630          *
 631          * @param source
 632          *            the story source
 633          * @param line
 634          *            the resource to check
 635          *
 636          * @return the image URL if found, or NULL
 637          *
 638          */
 639         static URL getImageUrl(BasicSupport support, URL source, String line) {
 640                 URL url = null;
 641
 642                 if (line != null) {
 643                         // try for files
 644                         String path = null;
 645                         if (source != null) {
 646                                 path = new File(source.getFile()).getParent();
 647                                 try {
 648                                         String basePath = new File(new File(path), line.trim())
 649                                                         .getAbsolutePath();
 650                                         for (String ext : getImageExt(true)) {
 651                                                 if (new File(basePath + ext).exists()) {
 652                                                         url = new File(basePath + ext).toURI().toURL();
 653                                                 }
 654                                         }
 655                                 } catch (Exception e) {
 656                                         // Nothing to do here
 657                                 }
 658                         }
 659
 660                         if (url == null) {
 661                                 // try for URLs
 662                                 try {
 663                                         for (String ext : getImageExt(true)) {
 664                                                 if (Instance.getCache().check(new URL(line + ext))) {
 665                                                         url = new URL(line + ext);
 666                                                         break;
 667                                                 }
 668                                         }
 669
 670                                         // try out of cache
 671                                         if (url == null) {
 672                                                 for (String ext : getImageExt(true)) {
 673                                                         try {
 674                                                                 url = new URL(line + ext);
 675                                                                 Instance.getCache().refresh(url, support, true);
 676                                                                 break;
 677                                                         } catch (IOException e) {
 678                                                                 // no image with this ext
 679                                                                 url = null;
 680                                                         }
 681                                                 }
 682                                         }
 683                                 } catch (MalformedURLException e) {
 684                                         // Not an url
 685                                 }
 686                         }
 687
 688                         // refresh the cached file
 689                         if (url != null) {
 690                                 try {
 691                                         Instance.getCache().refresh(url, support, true);
 692                                 } catch (IOException e) {
 693                                         // woops, broken image
 694                                         url = null;
 695                                 }
 696                         }
 697                 }
 698
 699                 return url;
 700         }
 701
 702         protected InputStream reset(InputStream in) {
 703                 try {
 704                         in.reset();
 705                 } catch (IOException e) {
 706                 }
 707                 return in;
 708         }
 709
 710         /**
 711          * Reset then return {@link BasicSupport#in}.
 712          *
 713          * @return {@link BasicSupport#in}
 714          */
 715         protected InputStream getInput() {
 716                 return reset(in);
 717         }
 718
 719         /**
 720          * Fix the author name if it is prefixed with some "by" {@link String}.
 721          *
 722          * @param author
 723          *            the author with a possible prefix
 724          *
 725          * @return the author without prefixes
 726          */
 727         protected String fixAuthor(String author) {
 728                 if (author != null) {
 729                         for (String suffix : new String[] { " ", ":" }) {
 730                                 for (String byString : Instance.getConfig()
 731                                                 .getString(Config.BYS).split(",")) {
 732                                         byString += suffix;
 733                                         if (author.toUpperCase().startsWith(byString.toUpperCase())) {
 734                                                 author = author.substring(byString.length()).trim();
 735                                         }
 736                                 }
 737                         }
 738
 739                         // Special case (without suffix):
 740                         if (author.startsWith("©")) {
 741                                 author = author.substring(1);
 742                         }
 743                 }
 744
 745                 return author;
 746         }
 747
 748         /**
 749          * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
 750          * and requotify them (i.e., separate them into QUOTE paragraphs and other
 751          * paragraphs (quotes or not)).
 752          *
 753          * @param para
 754          *            the paragraph to requotify (not necessaraly a quote)
 755          *
 756          * @return the correctly (or so we hope) quotified paragraphs
 757          */
 758         private List<Paragraph> requotify(Paragraph para) {
 759                 List<Paragraph> newParas = new ArrayList<Paragraph>();
 760
 761                 if (para.getType() == ParagraphType.QUOTE
 762                                 && para.getContent().length() > 2) {
 763                         String line = para.getContent();
 764                         boolean singleQ = line.startsWith("" + openQuote);
 765                         boolean doubleQ = line.startsWith("" + openDoubleQuote);
 766
 767                         if (!singleQ && !doubleQ) {
 768                                 line = openDoubleQuote + line + closeDoubleQuote;
 769                                 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
 770                         } else {
 771                                 char open = singleQ ? openQuote : openDoubleQuote;
 772                                 char close = singleQ ? closeQuote : closeDoubleQuote;
 773
 774                                 int posDot = -1;
 775                                 boolean inQuote = false;
 776                                 int i = 0;
 777                                 for (char car : line.toCharArray()) {
 778                                         if (car == open) {
 779                                                 inQuote = true;
 780                                         } else if (car == close) {
 781                                                 inQuote = false;
 782                                         } else if (car == '.' && !inQuote) {
 783                                                 posDot = i;
 784                                                 break;
 785                                         }
 786                                         i++;
 787                                 }
 788
 789                                 if (posDot >= 0) {
 790                                         String rest = line.substring(posDot + 1).trim();
 791                                         line = line.substring(0, posDot + 1).trim();
 792                                         newParas.add(new Paragraph(ParagraphType.QUOTE, line));
 793                                         if (!rest.isEmpty()) {
 794                                                 newParas.addAll(requotify(processPara(rest)));
 795                                         }
 796                                 } else {
 797                                         newParas.add(para);
 798                                 }
 799                         }
 800                 } else {
 801                         newParas.add(para);
 802                 }
 803
 804                 return newParas;
 805         }
 806
 807         /**
 808          * Process a {@link Paragraph} from a raw line of text.
 809          * <p>
 810          * Will also fix quotes and HTML encoding if needed.
 811          *
 812          * @param line
 813          *            the raw line
 814          *
 815          * @return the processed {@link Paragraph}
 816          */
 817         private Paragraph processPara(String line) {
 818                 line = ifUnhtml(line).trim();
 819
 820                 boolean space = true;
 821                 boolean brk = true;
 822                 boolean quote = false;
 823                 boolean tentativeCloseQuote = false;
 824                 char prev = '\0';
 825                 int dashCount = 0;
 826
 827                 StringBuilder builder = new StringBuilder();
 828                 for (char car : line.toCharArray()) {
 829                         if (car != '-') {
 830                                 if (dashCount > 0) {
 831                                         // dash, ndash and mdash: - – —
 832                                         // currently: always use mdash
 833                                         builder.append(dashCount == 1 ? '-' : '—');
 834                                 }
 835                                 dashCount = 0;
 836                         }
 837
 838                         if (tentativeCloseQuote) {
 839                                 tentativeCloseQuote = false;
 840                                 if ((car >= 'a' && car <= 'z') || (car >= 'A' && car <= 'Z')
 841                                                 || (car >= '0' && car <= '9')) {
 842                                         builder.append("'");
 843                                 } else {
 844                                         builder.append(closeQuote);
 845                                 }
 846                         }
 847
 848                         switch (car) {
 849                         case ' ': // note: unbreakable space
 850                         case ' ':
 851                         case '\t':
 852                         case '\n': // just in case
 853                         case '\r': // just in case
 854                                 builder.append(' ');
 855                                 break;
 856
 857                         case '\'':
 858                                 if (space || (brk && quote)) {
 859                                         quote = true;
 860                                         builder.append(openQuote);
 861                                 } else if (prev == ' ') {
 862                                         builder.append(openQuote);
 863                                 } else {
 864                                         // it is a quote ("I'm off") or a 'quote' ("This
 865                                         // 'good' restaurant"...)
 866                                         tentativeCloseQuote = true;
 867                                 }
 868                                 break;
 869
 870                         case '"':
 871                                 if (space || (brk && quote)) {
 872                                         quote = true;
 873                                         builder.append(openDoubleQuote);
 874                                 } else if (prev == ' ') {
 875                                         builder.append(openDoubleQuote);
 876                                 } else {
 877                                         builder.append(closeDoubleQuote);
 878                                 }
 879                                 break;
 880
 881                         case '-':
 882                                 if (space) {
 883                                         quote = true;
 884                                 } else {
 885                                         dashCount++;
 886                                 }
 887                                 space = false;
 888                                 break;
 889
 890                         case '*':
 891                         case '~':
 892                         case '/':
 893                         case '\\':
 894                         case '<':
 895                         case '>':
 896                         case '=':
 897                         case '+':
 898                         case '_':
 899                         case '–':
 900                         case '—':
 901                                 space = false;
 902                                 builder.append(car);
 903                                 break;
 904
 905                         case '‘':
 906                         case '`':
 907                         case '‹':
 908                         case '﹁':
 909                         case '〈':
 910                         case '「':
 911                                 if (space || (brk && quote)) {
 912                                         quote = true;
 913                                         builder.append(openQuote);
 914                                 } else {
 915                                         builder.append(openQuote);
 916                                 }
 917                                 space = false;
 918                                 brk = false;
 919                                 break;
 920
 921                         case '’':
 922                         case '›':
 923                         case '﹂':
 924                         case '〉':
 925                         case '」':
 926                                 space = false;
 927                                 brk = false;
 928                                 builder.append(closeQuote);
 929                                 break;
 930
 931                         case '«':
 932                         case '“':
 933                         case '﹃':
 934                         case '《':
 935                         case '『':
 936                                 if (space || (brk && quote)) {
 937                                         quote = true;
 938                                         builder.append(openDoubleQuote);
 939                                 } else {
 940                                         builder.append(openDoubleQuote);
 941                                 }
 942                                 space = false;
 943                                 brk = false;
 944                                 break;
 945
 946                         case '»':
 947                         case '”':
 948                         case '﹄':
 949                         case '》':
 950                         case '』':
 951                                 space = false;
 952                                 brk = false;
 953                                 builder.append(closeDoubleQuote);
 954                                 break;
 955
 956                         default:
 957                                 space = false;
 958                                 brk = false;
 959                                 builder.append(car);
 960                                 break;
 961                         }
 962
 963                         prev = car;
 964                 }
 965
 966                 if (tentativeCloseQuote) {
 967                         tentativeCloseQuote = false;
 968                         builder.append(closeQuote);
 969                 }
 970
 971                 line = builder.toString().trim();
 972
 973                 ParagraphType type = ParagraphType.NORMAL;
 974                 if (space) {
 975                         type = ParagraphType.BLANK;
 976                 } else if (brk) {
 977                         type = ParagraphType.BREAK;
 978                 } else if (quote) {
 979                         type = ParagraphType.QUOTE;
 980                 }
 981
 982                 return new Paragraph(type, line);
 983         }
 984
 985         /**
 986          * Remove the HTML from the inpit <b>if</b> {@link BasicSupport#isHtml()} is
 987          * true.
 988          *
 989          * @param input
 990          *            the input
 991          *
 992          * @return the no html version if needed
 993          */
 994         private String ifUnhtml(String input) {
 995                 if (isHtml() && input != null) {
 996                         return StringUtils.unhtml(input);
 997                 }
 998
 999                 return input;
1000         }
1001
1002         /**
1003          * Return a {@link BasicSupport} implementation supporting the given
1004          * resource if possible.
1005          *
1006          * @param url
1007          *            the story resource
1008          *
1009          * @return an implementation that supports it, or NULL
1010          */
1011         public static BasicSupport getSupport(URL url) {
1012                 if (url == null) {
1013                         return null;
1014                 }
1015
1016                 // TEXT and INFO_TEXT always support files (not URLs though)
1017                 for (SupportType type : SupportType.values()) {
1018                         if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1019                                 BasicSupport support = getSupport(type);
1020                                 if (support != null && support.supports(url)) {
1021                                         return support;
1022                                 }
1023                         }
1024                 }
1025
1026                 for (SupportType type : new SupportType[] { SupportType.TEXT,
1027                                 SupportType.INFO_TEXT }) {
1028                         BasicSupport support = getSupport(type);
1029                         if (support != null && support.supports(url)) {
1030                                 return support;
1031                         }
1032                 }
1033
1034                 return null;
1035         }
1036
1037         /**
1038          * Return a {@link BasicSupport} implementation supporting the given type.
1039          *
1040          * @param type
1041          *            the type
1042          *
1043          * @return an implementation that supports it, or NULL
1044          */
1045         public static BasicSupport getSupport(SupportType type) {
1046                 switch (type) {
1047                 case EPUB:
1048                         return new Epub().setType(type);
1049                 case INFO_TEXT:
1050                         return new InfoText().setType(type);
1051                 case FIMFICTION:
1052                         return new Fimfiction().setType(type);
1053                 case FANFICTION:
1054                         return new Fanfiction().setType(type);
1055                 case TEXT:
1056                         return new Text().setType(type);
1057                 case MANGAFOX:
1058                         return new MangaFox().setType(type);
1059                 case E621:
1060                         return new E621().setType(type);
1061                 case CBZ:
1062                         return new Cbz().setType(type);
1063                 }
1064
1065                 return null;
1066         }
1067
1068         /**
1069          * Return the first line from the given input which correspond to the given
1070          * selectors.
1071          *
1072          * @param in
1073          *            the input
1074          * @param needle
1075          *            a string that must be found inside the target line (also
1076          *            supports "^" at start to say "only if it starts with" the
1077          *            needle)
1078          * @param relativeLine
1079          *            the line to return based upon the target line position (-1 =
1080          *            the line before, 0 = the target line...)
1081          *
1082          * @return the line
1083          */
1084         static String getLine(InputStream in, String needle, int relativeLine) {
1085                 return getLine(in, needle, relativeLine, true);
1086         }
1087
1088         /**
1089          * Return a line from the given input which correspond to the given
1090          * selectors.
1091          *
1092          * @param in
1093          *            the input
1094          * @param needle
1095          *            a string that must be found inside the target line (also
1096          *            supports "^" at start to say "only if it starts with" the
1097          *            needle)
1098          * @param relativeLine
1099          *            the line to return based upon the target line position (-1 =
1100          *            the line before, 0 = the target line...)
1101          * @param first
1102          *            takes the first result (as opposed to the last one, which will
1103          *            also always spend the input)
1104          *
1105          * @return the line
1106          */
1107         static String getLine(InputStream in, String needle, int relativeLine,
1108                         boolean first) {
1109                 String rep = null;
1110
1111                 try {
1112                         in.reset();
1113                 } catch (IOException e) {
1114                         Instance.syserr(e);
1115                 }
1116
1117                 List<String> lines = new ArrayList<String>();
1118                 @SuppressWarnings("resource")
1119                 Scanner scan = new Scanner(in, "UTF-8");
1120                 int index = -1;
1121                 scan.useDelimiter("\\n");
1122                 while (scan.hasNext()) {
1123                         lines.add(scan.next());
1124
1125                         if (index == -1) {
1126                                 if (needle.startsWith("^")) {
1127                                         if (lines.get(lines.size() - 1).startsWith(
1128                                                         needle.substring(1))) {
1129                                                 index = lines.size() - 1;
1130                                         }
1131
1132                                 } else {
1133                                         if (lines.get(lines.size() - 1).contains(needle)) {
1134                                                 index = lines.size() - 1;
1135                                         }
1136                                 }
1137                         }
1138
1139                         if (index >= 0 && index + relativeLine < lines.size()) {
1140                                 rep = lines.get(index + relativeLine);
1141                                 if (first) {
1142                                         break;
1143                                 }
1144                         }
1145                 }
1146
1147                 return rep;
1148         }
1149 }