BasicSupport.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.awt.image.BufferedImage;
   4 import java.io.ByteArrayInputStream;
   5 import java.io.File;
   6 import java.io.IOException;
   7 import java.io.InputStream;
   8 import java.net.MalformedURLException;
   9 import java.net.URL;
  10 import java.util.ArrayList;
  11 import java.util.HashMap;
  12 import java.util.List;
  13 import java.util.Map;
  14 import java.util.Map.Entry;
  15 import java.util.Scanner;
  16
  17 import javax.imageio.ImageIO;
  18
  19 import be.nikiroo.fanfix.Instance;
  20 import be.nikiroo.fanfix.bundles.Config;
  21 import be.nikiroo.fanfix.bundles.StringId;
  22 import be.nikiroo.fanfix.data.Chapter;
  23 import be.nikiroo.fanfix.data.MetaData;
  24 import be.nikiroo.fanfix.data.Paragraph;
  25 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
  26 import be.nikiroo.fanfix.data.Story;
  27 import be.nikiroo.utils.StringUtils;
  28
  29 /**
  30  * This class is the base class used by the other support classes. It can be
  31  * used outside of this package, and have static method that you can use to get
  32  * access to the correct support class.
  33  * <p>
  34  * It will be used with 'resources' (usually web pages or files).
  35  *
  36  * @author niki
  37  */
  38 public abstract class BasicSupport {
  39         /**
  40          * The supported input types for which we can get a {@link BasicSupport}
  41          * object.
  42          *
  43          * @author niki
  44          */
  45         public enum SupportType {
  46                 /** EPUB files created with this program */
  47                 EPUB,
  48                 /** Pure text file with some rules */
  49                 TEXT,
  50                 /** TEXT but with associated .info file */
  51                 INFO_TEXT,
  52                 /** My Little Pony fanfictions */
  53                 FIMFICTION,
  54                 /** Fanfictions from a lot of different universes */
  55                 FANFICTION,
  56                 /** Website with lots of Mangas */
  57                 MANGAFOX,
  58                 /** Furry website with comics support */
  59                 E621,
  60                 /** CBZ files */
  61                 CBZ;
  62
  63                 /**
  64                  * A description of this support type (more information than the
  65                  * {@link BasicSupport#getSourceName()}).
  66                  *
  67                  * @return the description
  68                  */
  69                 public String getDesc() {
  70                         String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
  71                                         this.name());
  72
  73                         if (desc == null) {
  74                                 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
  75                         }
  76
  77                         return desc;
  78                 }
  79
  80                 /**
  81                  * The name of this support type (a short version).
  82                  *
  83                  * @return the name
  84                  */
  85                 public String getSourceName() {
  86                         BasicSupport support = BasicSupport.getSupport(this);
  87                         if (support != null) {
  88                                 return support.getSourceName();
  89                         }
  90
  91                         return null;
  92                 }
  93
  94                 @Override
  95                 public String toString() {
  96                         return super.toString().toLowerCase();
  97                 }
  98
  99                 /**
 100                  * Call {@link SupportType#valueOf(String.toUpperCase())}.
 101                  *
 102                  * @param typeName
 103                  *            the possible type name
 104                  *
 105                  * @return NULL or the type
 106                  */
 107                 public static SupportType valueOfUC(String typeName) {
 108                         return SupportType.valueOf(typeName == null ? null : typeName
 109                                         .toUpperCase());
 110                 }
 111
 112                 /**
 113                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 114                  * NULL for NULL instead of raising exception.
 115                  *
 116                  * @param typeName
 117                  *            the possible type name
 118                  *
 119                  * @return NULL or the type
 120                  */
 121                 public static SupportType valueOfNullOkUC(String typeName) {
 122                         if (typeName == null) {
 123                                 return null;
 124                         }
 125
 126                         return SupportType.valueOfUC(typeName);
 127                 }
 128
 129                 /**
 130                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 131                  * NULL in case of error instead of raising an exception.
 132                  *
 133                  * @param typeName
 134                  *            the possible type name
 135                  *
 136                  * @return NULL or the type
 137                  */
 138                 public static SupportType valueOfAllOkUC(String typeName) {
 139                         try {
 140                                 return SupportType.valueOfUC(typeName);
 141                         } catch (Exception e) {
 142                                 return null;
 143                         }
 144                 }
 145         }
 146
 147         private InputStream in;
 148         private SupportType type;
 149         private URL currentReferer; // with on 'r', as in 'HTTP'...
 150
 151         // quote chars
 152         private char openQuote = Instance.getTrans().getChar(
 153                         StringId.OPEN_SINGLE_QUOTE);
 154         private char closeQuote = Instance.getTrans().getChar(
 155                         StringId.CLOSE_SINGLE_QUOTE);
 156         private char openDoubleQuote = Instance.getTrans().getChar(
 157                         StringId.OPEN_DOUBLE_QUOTE);
 158         private char closeDoubleQuote = Instance.getTrans().getChar(
 159                         StringId.CLOSE_DOUBLE_QUOTE);
 160
 161         /**
 162          * The name of this support class.
 163          *
 164          * @return the name
 165          */
 166         protected abstract String getSourceName();
 167
 168         /**
 169          * Check if the given resource is supported by this {@link BasicSupport}.
 170          *
 171          * @param url
 172          *            the resource to check for
 173          *
 174          * @return TRUE if it is
 175          */
 176         protected abstract boolean supports(URL url);
 177
 178         /**
 179          * Return TRUE if the support will return HTML encoded content values for
 180          * the chapters content.
 181          *
 182          * @return TRUE for HTML
 183          */
 184         protected abstract boolean isHtml();
 185
 186         protected abstract MetaData getMeta(URL source, InputStream in)
 187                         throws IOException;
 188
 189         /**
 190          * Return the story description.
 191          *
 192          * @param source
 193          *            the source of the story
 194          * @param in
 195          *            the input (the main resource)
 196          *
 197          * @return the description
 198          *
 199          * @throws IOException
 200          *             in case of I/O error
 201          */
 202         protected abstract String getDesc(URL source, InputStream in)
 203                         throws IOException;
 204
 205         /**
 206          * Return the list of chapters (name and resource).
 207          *
 208          * @param source
 209          *            the source of the story
 210          * @param in
 211          *            the input (the main resource)
 212          *
 213          * @return the chapters
 214          *
 215          * @throws IOException
 216          *             in case of I/O error
 217          */
 218         protected abstract List<Entry<String, URL>> getChapters(URL source,
 219                         InputStream in) throws IOException;
 220
 221         /**
 222          * Return the content of the chapter (possibly HTML encoded, if
 223          * {@link BasicSupport#isHtml()} is TRUE).
 224          *
 225          * @param source
 226          *            the source of the story
 227          * @param in
 228          *            the input (the main resource)
 229          * @param number
 230          *            the chapter number
 231          *
 232          * @return the content
 233          *
 234          * @throws IOException
 235          *             in case of I/O error
 236          */
 237         protected abstract String getChapterContent(URL source, InputStream in,
 238                         int number) throws IOException;
 239
 240         /**
 241          * Return the list of cookies (values included) that must be used to
 242          * correctly fetch the resources.
 243          * <p>
 244          * You are expected to call the super method implementation if you override
 245          * it.
 246          *
 247          * @return the cookies
 248          */
 249         public Map<String, String> getCookies() {
 250                 return new HashMap<String, String>();
 251         }
 252
 253         /**
 254          * Process the given story resource into a partially filled {@link Story}
 255          * object containing the name and metadata, except for the description.
 256          *
 257          * @param url
 258          *            the story resource
 259          *
 260          * @return the {@link Story}
 261          *
 262          * @throws IOException
 263          *             in case of I/O error
 264          */
 265         public Story processMeta(URL url) throws IOException {
 266                 return processMeta(url, true, false);
 267         }
 268
 269         /**
 270          * Process the given story resource into a partially filled {@link Story}
 271          * object containing the name and metadata.
 272          *
 273          * @param url
 274          *            the story resource
 275          *
 276          * @param close
 277          *            close "this" and "in" when done
 278          *
 279          * @return the {@link Story}
 280          *
 281          * @throws IOException
 282          *             in case of I/O error
 283          */
 284         protected Story processMeta(URL url, boolean close, boolean getDesc)
 285                         throws IOException {
 286                 in = Instance.getCache().open(url, this, false);
 287                 if (in == null) {
 288                         return null;
 289                 }
 290
 291                 try {
 292                         preprocess(url, getInput());
 293
 294                         Story story = new Story();
 295                         MetaData meta = getMeta(url, getInput());
 296                         story.setMeta(meta);
 297
 298                         if (meta != null && meta.getCover() == null) {
 299                                 meta.setCover(getDefaultCover(meta.getSubject()));
 300                         }
 301
 302                         if (getDesc) {
 303                                 String descChapterName = Instance.getTrans().getString(
 304                                                 StringId.DESCRIPTION);
 305                                 story.getMeta().setResume(
 306                                                 makeChapter(url, 0, descChapterName,
 307                                                                 getDesc(url, getInput())));
 308                         }
 309
 310                         return story;
 311                 } finally {
 312                         if (close) {
 313                                 try {
 314                                         close();
 315                                 } catch (IOException e) {
 316                                         Instance.syserr(e);
 317                                 }
 318
 319                                 if (in != null) {
 320                                         in.close();
 321                                 }
 322                         }
 323                 }
 324         }
 325
 326         /**
 327          * Process the given story resource into a fully filled {@link Story}
 328          * object.
 329          *
 330          * @param url
 331          *            the story resource
 332          *
 333          * @return the {@link Story}
 334          *
 335          * @throws IOException
 336          *             in case of I/O error
 337          */
 338         public Story process(URL url) throws IOException {
 339                 setCurrentReferer(url);
 340
 341                 try {
 342                         Story story = processMeta(url, false, true);
 343                         if (story == null) {
 344                                 return null;
 345                         }
 346
 347                         story.setChapters(new ArrayList<Chapter>());
 348
 349                         List<Entry<String, URL>> chapters = getChapters(url, getInput());
 350                         int i = 1;
 351                         if (chapters != null) {
 352                                 for (Entry<String, URL> chap : chapters) {
 353                                         setCurrentReferer(chap.getValue());
 354                                         InputStream chapIn = Instance.getCache().open(
 355                                                         chap.getValue(), this, true);
 356                                         try {
 357                                                 story.getChapters().add(
 358                                                                 makeChapter(url, i, chap.getKey(),
 359                                                                                 getChapterContent(url, chapIn, i)));
 360                                         } finally {
 361                                                 chapIn.close();
 362                                         }
 363                                         i++;
 364                                 }
 365                         }
 366
 367                         return story;
 368
 369                 } finally {
 370                         try {
 371                                 close();
 372                         } catch (IOException e) {
 373                                 Instance.syserr(e);
 374                         }
 375
 376                         if (in != null) {
 377                                 in.close();
 378                         }
 379
 380                         currentReferer = null;
 381                 }
 382         }
 383
 384         /**
 385          * The support type.$
 386          *
 387          * @return the type
 388          */
 389         public SupportType getType() {
 390                 return type;
 391         }
 392
 393         /**
 394          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 395          * the current {@link URL} we work on.
 396          *
 397          * @return the referer
 398          */
 399         public URL getCurrentReferer() {
 400                 return currentReferer;
 401         }
 402
 403         /**
 404          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 405          * the current {@link URL} we work on.
 406          *
 407          * @param currentReferer
 408          *            the new referer
 409          */
 410         protected void setCurrentReferer(URL currentReferer) {
 411                 this.currentReferer = currentReferer;
 412         }
 413
 414         /**
 415          * The support type.
 416          *
 417          * @param type
 418          *            the new type
 419          *
 420          * @return this
 421          */
 422         protected BasicSupport setType(SupportType type) {
 423                 this.type = type;
 424                 return this;
 425         }
 426
 427         /**
 428          * Prepare the support if needed before processing.
 429          *
 430          * @param source
 431          *            the source of the story
 432          * @param in
 433          *            the input (the main resource)
 434          *
 435          * @throws IOException
 436          *             on I/O error
 437          */
 438         protected void preprocess(URL source, InputStream in) throws IOException {
 439         }
 440
 441         /**
 442          * Now that we have processed the {@link Story}, close the resources if any.
 443          *
 444          * @throws IOException
 445          *             on I/O error
 446          */
 447         protected void close() throws IOException {
 448         }
 449
 450         /**
 451          * Create a {@link Chapter} object from the given information, formatting
 452          * the content as it should be.
 453          *
 454          * @param number
 455          *            the chapter number
 456          * @param name
 457          *            the chapter name
 458          * @param content
 459          *            the chapter content
 460          *
 461          * @return the {@link Chapter}
 462          *
 463          * @throws IOException
 464          *             in case of I/O error
 465          */
 466         protected Chapter makeChapter(URL source, int number, String name,
 467                         String content) throws IOException {
 468
 469                 // Chapter name: process it correctly, then remove the possible
 470                 // redundant "Chapter x: " in front of it
 471                 String chapterName = processPara(name).getContent().trim();
 472                 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
 473                                 .split(",")) {
 474                         String chapterWord = Instance.getConfig().getStringX(
 475                                         Config.CHAPTER, lang);
 476                         if (chapterName.startsWith(chapterWord)) {
 477                                 chapterName = chapterName.substring(chapterWord.length())
 478                                                 .trim();
 479                                 break;
 480                         }
 481                 }
 482
 483                 if (chapterName.startsWith(Integer.toString(number))) {
 484                         chapterName = chapterName.substring(
 485                                         Integer.toString(number).length()).trim();
 486                 }
 487
 488                 if (chapterName.startsWith(":")) {
 489                         chapterName = chapterName.substring(1).trim();
 490                 }
 491                 //
 492
 493                 Chapter chap = new Chapter(number, chapterName);
 494
 495                 if (content == null) {
 496                         return chap;
 497                 }
 498
 499                 if (isHtml()) {
 500                         // Special <HR> processing:
 501                         content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
 502                                         "\n* * *\n");
 503                 }
 504
 505                 InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8"));
 506                 try {
 507                         @SuppressWarnings("resource")
 508                         Scanner scan = new Scanner(in, "UTF-8");
 509                         scan.useDelimiter("(\\n|</p>)"); // \n for test, </p> for html
 510
 511                         List<Paragraph> paras = new ArrayList<Paragraph>();
 512                         while (scan.hasNext()) {
 513                                 String line = scan.next().trim();
 514                                 boolean image = false;
 515                                 if (line.startsWith("[") && line.endsWith("]")) {
 516                                         URL url = getImageUrl(source,
 517                                                         line.substring(1, line.length() - 1).trim());
 518                                         if (url != null) {
 519                                                 paras.add(new Paragraph(url));
 520                                                 image = true;
 521                                         }
 522                                 }
 523
 524                                 if (!image) {
 525                                         paras.add(processPara(line));
 526                                 }
 527                         }
 528
 529                         // Check quotes for "bad" format
 530                         List<Paragraph> newParas = new ArrayList<Paragraph>();
 531                         for (Paragraph para : paras) {
 532                                 newParas.addAll(requotify(para));
 533                         }
 534                         paras = newParas;
 535
 536                         // Remove double blanks/brks
 537                         boolean space = false;
 538                         boolean brk = true;
 539                         for (int i = 0; i < paras.size(); i++) {
 540                                 Paragraph para = paras.get(i);
 541                                 boolean thisSpace = para.getType() == ParagraphType.BLANK;
 542                                 boolean thisBrk = para.getType() == ParagraphType.BREAK;
 543
 544                                 if (space && thisBrk) {
 545                                         paras.remove(i - 1);
 546                                         i--;
 547                                 } else if ((space || brk) && (thisSpace || thisBrk)) {
 548                                         paras.remove(i);
 549                                         i--;
 550                                 }
 551
 552                                 space = thisSpace;
 553                                 brk = thisBrk;
 554                         }
 555
 556                         // Remove blank/brk at start
 557                         if (paras.size() > 0
 558                                         && (paras.get(0).getType() == ParagraphType.BLANK || paras
 559                                                         .get(0).getType() == ParagraphType.BREAK)) {
 560                                 paras.remove(0);
 561                         }
 562
 563                         // Remove blank/brk at end
 564                         int last = paras.size() - 1;
 565                         if (paras.size() > 0
 566                                         && (paras.get(last).getType() == ParagraphType.BLANK || paras
 567                                                         .get(last).getType() == ParagraphType.BREAK)) {
 568                                 paras.remove(last);
 569                         }
 570
 571                         chap.setParagraphs(paras);
 572
 573                         return chap;
 574                 } finally {
 575                         in.close();
 576                 }
 577         }
 578
 579         static BufferedImage getDefaultCover(String subject) {
 580                 if (subject != null && !subject.isEmpty()
 581                                 && Instance.getCoverDir() != null) {
 582                         try {
 583                                 File fileCover = new File(Instance.getCoverDir(), subject);
 584                                 return getImage(fileCover.toURI().toURL(), subject);
 585                         } catch (MalformedURLException e) {
 586                         }
 587                 }
 588
 589                 return null;
 590         }
 591
 592         /**
 593          * Return the list of supported image extensions.
 594          *
 595          * @return the extensions
 596          */
 597         static String[] getImageExt(boolean emptyAllowed) {
 598                 if (emptyAllowed) {
 599                         return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 600                 } else {
 601                         return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 602                 }
 603         }
 604
 605         static BufferedImage getImage(URL source, String line) {
 606                 URL url = getImageUrl(source, line);
 607                 if (url != null) {
 608                         InputStream in = null;
 609                         try {
 610                                 in = Instance.getCache().open(url, getSupport(url), true);
 611                                 return ImageIO.read(in);
 612                         } catch (IOException e) {
 613                         } finally {
 614                                 if (in != null) {
 615                                         try {
 616                                                 in.close();
 617                                         } catch (IOException e) {
 618                                         }
 619                                 }
 620                         }
 621                 }
 622
 623                 return null;
 624         }
 625
 626         /**
 627          * Check if the given resource can be a local image or a remote image, then
 628          * refresh the cache with it if it is.
 629          *
 630          * @param source
 631          *            the story source
 632          * @param line
 633          *            the resource to check
 634          *
 635          * @return the image URL if found, or NULL
 636          *
 637          */
 638         static URL getImageUrl(URL source, String line) {
 639                 URL url = null;
 640
 641                 if (line != null) {
 642                         // try for files
 643                         String path = null;
 644                         if (source != null) {
 645                                 path = new File(source.getFile()).getParent();
 646                                 try {
 647                                         String urlBase = new File(new File(path), line.trim())
 648                                                         .toURI().toURL().toString();
 649                                         for (String ext : getImageExt(true)) {
 650                                                 if (new File(urlBase + ext).exists()) {
 651                                                         url = new File(urlBase + ext).toURI().toURL();
 652                                                 }
 653                                         }
 654                                 } catch (Exception e) {
 655                                         // Nothing to do here
 656                                 }
 657                         }
 658
 659                         if (url == null) {
 660                                 // try for URLs
 661                                 try {
 662                                         for (String ext : getImageExt(true)) {
 663                                                 if (Instance.getCache().check(new URL(line + ext))) {
 664                                                         url = new URL(line + ext);
 665                                                 }
 666                                         }
 667
 668                                         // try out of cache
 669                                         if (url == null) {
 670                                                 for (String ext : getImageExt(true)) {
 671                                                         try {
 672                                                                 url = new URL(line + ext);
 673                                                                 Instance.getCache().refresh(url,
 674                                                                                 getSupport(url), true);
 675                                                                 break;
 676                                                         } catch (IOException e) {
 677                                                                 // no image with this ext
 678                                                                 url = null;
 679                                                         }
 680                                                 }
 681                                         }
 682                                 } catch (MalformedURLException e) {
 683                                         // Not an url
 684                                 }
 685                         }
 686
 687                         // refresh the cached file
 688                         if (url != null) {
 689                                 try {
 690                                         Instance.getCache().refresh(url, getSupport(url), true);
 691                                 } catch (IOException e) {
 692                                         // woops, broken image
 693                                         url = null;
 694                                 }
 695                         }
 696                 }
 697
 698                 return url;
 699         }
 700
 701         protected InputStream reset(InputStream in) {
 702                 try {
 703                         in.reset();
 704                 } catch (IOException e) {
 705                 }
 706                 return in;
 707         }
 708
 709         /**
 710          * Reset then return {@link BasicSupport#in}.
 711          *
 712          * @return {@link BasicSupport#in}
 713          */
 714         protected InputStream getInput() {
 715                 return reset(in);
 716         }
 717
 718         /**
 719          * Fix the author name if it is prefixed with some "by" {@link String}.
 720          *
 721          * @param author
 722          *            the author with a possible prefix
 723          *
 724          * @return the author without prefixes
 725          */
 726         protected String fixAuthor(String author) {
 727                 if (author != null) {
 728                         for (String suffix : new String[] { " ", ":" }) {
 729                                 for (String byString : Instance.getConfig()
 730                                                 .getString(Config.BYS).split(",")) {
 731                                         byString += suffix;
 732                                         if (author.toUpperCase().startsWith(byString.toUpperCase())) {
 733                                                 author = author.substring(byString.length()).trim();
 734                                         }
 735                                 }
 736                         }
 737
 738                         // Special case (without suffix):
 739                         if (author.startsWith("©")) {
 740                                 author = author.substring(1);
 741                         }
 742                 }
 743
 744                 return author;
 745         }
 746
 747         /**
 748          * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
 749          * and requotify them (i.e., separate them into QUOTE paragraphs and other
 750          * paragraphs (quotes or not)).
 751          *
 752          * @param para
 753          *            the paragraph to requotify (not necessaraly a quote)
 754          *
 755          * @return the correctly (or so we hope) quotified paragraphs
 756          */
 757         private List<Paragraph> requotify(Paragraph para) {
 758                 List<Paragraph> newParas = new ArrayList<Paragraph>();
 759
 760                 if (para.getType() == ParagraphType.QUOTE
 761                                 && para.getContent().length() > 2) {
 762                         String line = para.getContent();
 763                         boolean singleQ = line.startsWith("" + openQuote);
 764                         boolean doubleQ = line.startsWith("" + openDoubleQuote);
 765
 766                         if (!singleQ && !doubleQ) {
 767                                 line = openDoubleQuote + line + closeDoubleQuote;
 768                                 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
 769                         } else {
 770                                 char close = singleQ ? closeQuote : closeDoubleQuote;
 771                                 int posClose = line.indexOf(close, 1);
 772                                 int posDot = line.indexOf(".");
 773                                 while (posDot >= 0 && posDot < posClose) {
 774                                         posDot = line.indexOf(".", posDot + 1);
 775                                 }
 776
 777                                 if (posDot >= 0) {
 778                                         String rest = line.substring(posDot + 1).trim();
 779                                         line = line.substring(0, posDot + 1).trim();
 780                                         newParas.add(new Paragraph(ParagraphType.QUOTE, line));
 781                                         if (!rest.isEmpty()) {
 782                                                 newParas.addAll(requotify(processPara(rest)));
 783                                         }
 784                                 } else {
 785                                         newParas.add(para);
 786                                 }
 787                         }
 788                 } else {
 789                         newParas.add(para);
 790                 }
 791
 792                 return newParas;
 793         }
 794
 795         /**
 796          * Process a {@link Paragraph} from a raw line of text.
 797          * <p>
 798          * Will also fix quotes and HTML encoding if needed.
 799          *
 800          * @param line
 801          *            the raw line
 802          *
 803          * @return the processed {@link Paragraph}
 804          */
 805         private Paragraph processPara(String line) {
 806                 line = ifUnhtml(line).trim();
 807
 808                 boolean space = true;
 809                 boolean brk = true;
 810                 boolean quote = false;
 811                 boolean tentativeCloseQuote = false;
 812                 char prev = '\0';
 813                 int dashCount = 0;
 814
 815                 StringBuilder builder = new StringBuilder();
 816                 for (char car : line.toCharArray()) {
 817                         if (car != '-') {
 818                                 if (dashCount > 0) {
 819                                         // dash, ndash and mdash: - – —
 820                                         // currently: always use mdash
 821                                         builder.append(dashCount == 1 ? '-' : '—');
 822                                 }
 823                                 dashCount = 0;
 824                         }
 825
 826                         if (tentativeCloseQuote) {
 827                                 tentativeCloseQuote = false;
 828                                 if ((car >= 'a' && car <= 'z') || (car >= 'A' && car <= 'Z')
 829                                                 || (car >= '0' && car <= '9')) {
 830                                         builder.append("'");
 831                                 } else {
 832                                         builder.append(closeQuote);
 833                                 }
 834                         }
 835
 836                         switch (car) {
 837                         case ' ': // note: unbreakable space
 838                         case ' ':
 839                         case '\t':
 840                         case '\n': // just in case
 841                         case '\r': // just in case
 842                                 builder.append(' ');
 843                                 break;
 844
 845                         case '\'':
 846                                 if (space || (brk && quote)) {
 847                                         quote = true;
 848                                         builder.append(openQuote);
 849                                 } else if (prev == ' ') {
 850                                         builder.append(openQuote);
 851                                 } else {
 852                                         // it is a quote ("I'm off") or a 'quote' ("This
 853                                         // 'good' restaurant"...)
 854                                         tentativeCloseQuote = true;
 855                                 }
 856                                 break;
 857
 858                         case '"':
 859                                 if (space || (brk && quote)) {
 860                                         quote = true;
 861                                         builder.append(openDoubleQuote);
 862                                 } else if (prev == ' ') {
 863                                         builder.append(openDoubleQuote);
 864                                 } else {
 865                                         builder.append(closeDoubleQuote);
 866                                 }
 867                                 break;
 868
 869                         case '-':
 870                                 if (space) {
 871                                         quote = true;
 872                                 } else {
 873                                         dashCount++;
 874                                 }
 875                                 space = false;
 876                                 break;
 877
 878                         case '*':
 879                         case '~':
 880                         case '/':
 881                         case '\\':
 882                         case '<':
 883                         case '>':
 884                         case '=':
 885                         case '+':
 886                         case '_':
 887                         case '–':
 888                         case '—':
 889                                 space = false;
 890                                 builder.append(car);
 891                                 break;
 892
 893                         case '‘':
 894                         case '`':
 895                         case '‹':
 896                         case '﹁':
 897                         case '〈':
 898                         case '「':
 899                                 if (space || (brk && quote)) {
 900                                         quote = true;
 901                                         builder.append(openQuote);
 902                                 } else {
 903                                         builder.append(openQuote);
 904                                 }
 905                                 space = false;
 906                                 brk = false;
 907                                 break;
 908
 909                         case '’':
 910                         case '›':
 911                         case '﹂':
 912                         case '〉':
 913                         case '」':
 914                                 space = false;
 915                                 brk = false;
 916                                 builder.append(closeQuote);
 917                                 break;
 918
 919                         case '«':
 920                         case '“':
 921                         case '﹃':
 922                         case '《':
 923                         case '『':
 924                                 if (space || (brk && quote)) {
 925                                         quote = true;
 926                                         builder.append(openDoubleQuote);
 927                                 } else {
 928                                         builder.append(openDoubleQuote);
 929                                 }
 930                                 space = false;
 931                                 brk = false;
 932                                 break;
 933
 934                         case '»':
 935                         case '”':
 936                         case '﹄':
 937                         case '》':
 938                         case '』':
 939                                 space = false;
 940                                 brk = false;
 941                                 builder.append(closeDoubleQuote);
 942                                 break;
 943
 944                         default:
 945                                 space = false;
 946                                 brk = false;
 947                                 builder.append(car);
 948                                 break;
 949                         }
 950
 951                         prev = car;
 952                 }
 953
 954                 if (tentativeCloseQuote) {
 955                         tentativeCloseQuote = false;
 956                         builder.append(closeQuote);
 957                 }
 958
 959                 line = builder.toString().trim();
 960
 961                 ParagraphType type = ParagraphType.NORMAL;
 962                 if (space) {
 963                         type = ParagraphType.BLANK;
 964                 } else if (brk) {
 965                         type = ParagraphType.BREAK;
 966                 } else if (quote) {
 967                         type = ParagraphType.QUOTE;
 968                 }
 969
 970                 return new Paragraph(type, line);
 971         }
 972
 973         /**
 974          * Remove the HTML from the inpit <b>if</b> {@link BasicSupport#isHtml()} is
 975          * true.
 976          *
 977          * @param input
 978          *            the input
 979          *
 980          * @return the no html version if needed
 981          */
 982         private String ifUnhtml(String input) {
 983                 if (isHtml() && input != null) {
 984                         return StringUtils.unhtml(input);
 985                 }
 986
 987                 return input;
 988         }
 989
 990         /**
 991          * Return a {@link BasicSupport} implementation supporting the given
 992          * resource if possible.
 993          *
 994          * @param url
 995          *            the story resource
 996          *
 997          * @return an implementation that supports it, or NULL
 998          */
 999         public static BasicSupport getSupport(URL url) {
1000                 if (url == null) {
1001                         return null;
1002                 }
1003
1004                 // TEXT and INFO_TEXT always support files (not URLs though)
1005                 for (SupportType type : SupportType.values()) {
1006                         if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1007                                 BasicSupport support = getSupport(type);
1008                                 if (support != null && support.supports(url)) {
1009                                         return support;
1010                                 }
1011                         }
1012                 }
1013
1014                 for (SupportType type : new SupportType[] { SupportType.TEXT,
1015                                 SupportType.INFO_TEXT }) {
1016                         BasicSupport support = getSupport(type);
1017                         if (support != null && support.supports(url)) {
1018                                 return support;
1019                         }
1020                 }
1021
1022                 return null;
1023         }
1024
1025         /**
1026          * Return a {@link BasicSupport} implementation supporting the given type.
1027          *
1028          * @param type
1029          *            the type
1030          *
1031          * @return an implementation that supports it, or NULL
1032          */
1033         public static BasicSupport getSupport(SupportType type) {
1034                 switch (type) {
1035                 case EPUB:
1036                         return new Epub().setType(type);
1037                 case INFO_TEXT:
1038                         return new InfoText().setType(type);
1039                 case FIMFICTION:
1040                         return new Fimfiction().setType(type);
1041                 case FANFICTION:
1042                         return new Fanfiction().setType(type);
1043                 case TEXT:
1044                         return new Text().setType(type);
1045                 case MANGAFOX:
1046                         return new MangaFox().setType(type);
1047                 case E621:
1048                         return new E621().setType(type);
1049                 case CBZ:
1050                         return new Cbz().setType(type);
1051                 }
1052
1053                 return null;
1054         }
1055
1056         /**
1057          * Return the first line from the given input which correspond to the given
1058          * selectors.
1059          *
1060          * @param in
1061          *            the input
1062          * @param needle
1063          *            a string that must be found inside the target line (also
1064          *            supports "^" at start to say "only if it starts with" the
1065          *            needle)
1066          * @param relativeLine
1067          *            the line to return based upon the target line position (-1 =
1068          *            the line before, 0 = the target line...)
1069          *
1070          * @return the line
1071          */
1072         static String getLine(InputStream in, String needle, int relativeLine) {
1073                 return getLine(in, needle, relativeLine, true);
1074         }
1075
1076         /**
1077          * Return a line from the given input which correspond to the given
1078          * selectors.
1079          *
1080          * @param in
1081          *            the input
1082          * @param needle
1083          *            a string that must be found inside the target line (also
1084          *            supports "^" at start to say "only if it starts with" the
1085          *            needle)
1086          * @param relativeLine
1087          *            the line to return based upon the target line position (-1 =
1088          *            the line before, 0 = the target line...)
1089          * @param first
1090          *            takes the first result (as opposed to the last one, which will
1091          *            also always spend the input)
1092          *
1093          * @return the line
1094          */
1095         static String getLine(InputStream in, String needle, int relativeLine,
1096                         boolean first) {
1097                 String rep = null;
1098
1099                 try {
1100                         in.reset();
1101                 } catch (IOException e) {
1102                         Instance.syserr(e);
1103                 }
1104
1105                 List<String> lines = new ArrayList<String>();
1106                 @SuppressWarnings("resource")
1107                 Scanner scan = new Scanner(in, "UTF-8");
1108                 int index = -1;
1109                 scan.useDelimiter("\\n");
1110                 while (scan.hasNext()) {
1111                         lines.add(scan.next());
1112
1113                         if (index == -1) {
1114                                 if (needle.startsWith("^")) {
1115                                         if (lines.get(lines.size() - 1).startsWith(
1116                                                         needle.substring(1))) {
1117                                                 index = lines.size() - 1;
1118                                         }
1119
1120                                 } else {
1121                                         if (lines.get(lines.size() - 1).contains(needle)) {
1122                                                 index = lines.size() - 1;
1123                                         }
1124                                 }
1125                         }
1126
1127                         if (index >= 0 && index + relativeLine < lines.size()) {
1128                                 rep = lines.get(index + relativeLine);
1129                                 if (first) {
1130                                         break;
1131                                 }
1132                         }
1133                 }
1134
1135                 return rep;
1136         }
1137 }