src/be/nikiroo/fanfix/supported/BasicSupport.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.awt.image.BufferedImage;
   4 import java.io.ByteArrayInputStream;
   5 import java.io.File;
   6 import java.io.IOException;
   7 import java.io.InputStream;
   8 import java.net.MalformedURLException;
   9 import java.net.URL;
  10 import java.util.ArrayList;
  11 import java.util.HashMap;
  12 import java.util.List;
  13 import java.util.Map;
  14 import java.util.Map.Entry;
  15 import java.util.Scanner;
  16
  17 import be.nikiroo.fanfix.Instance;
  18 import be.nikiroo.fanfix.bundles.Config;
  19 import be.nikiroo.fanfix.bundles.StringId;
  20 import be.nikiroo.fanfix.data.Chapter;
  21 import be.nikiroo.fanfix.data.MetaData;
  22 import be.nikiroo.fanfix.data.Paragraph;
  23 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
  24 import be.nikiroo.fanfix.data.Story;
  25 import be.nikiroo.utils.IOUtils;
  26 import be.nikiroo.utils.StringUtils;
  27
  28 /**
  29  * This class is the base class used by the other support classes. It can be
  30  * used outside of this package, and have static method that you can use to get
  31  * access to the correct support class.
  32  * <p>
  33  * It will be used with 'resources' (usually web pages or files).
  34  *
  35  * @author niki
  36  */
  37 public abstract class BasicSupport {
  38         /**
  39          * The supported input types for which we can get a {@link BasicSupport}
  40          * object.
  41          *
  42          * @author niki
  43          */
  44         public enum SupportType {
  45                 /** EPUB files created with this program */
  46                 EPUB,
  47                 /** Pure text file with some rules */
  48                 TEXT,
  49                 /** TEXT but with associated .info file */
  50                 INFO_TEXT,
  51                 /** My Little Pony fanfictions */
  52                 FIMFICTION,
  53                 /** Fanfictions from a lot of different universes */
  54                 FANFICTION,
  55                 /** Website with lots of Mangas */
  56                 MANGAFOX,
  57                 /** Furry website with comics support */
  58                 E621,
  59                 /** CBZ files */
  60                 CBZ;
  61
  62                 /**
  63                  * A description of this support type (more information than the
  64                  * {@link BasicSupport#getSourceName()}).
  65                  *
  66                  * @return the description
  67                  */
  68                 public String getDesc() {
  69                         String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
  70                                         this.name());
  71
  72                         if (desc == null) {
  73                                 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
  74                         }
  75
  76                         return desc;
  77                 }
  78
  79                 /**
  80                  * The name of this support type (a short version).
  81                  *
  82                  * @return the name
  83                  */
  84                 public String getSourceName() {
  85                         BasicSupport support = BasicSupport.getSupport(this);
  86                         if (support != null) {
  87                                 return support.getSourceName();
  88                         }
  89
  90                         return null;
  91                 }
  92
  93                 @Override
  94                 public String toString() {
  95                         return super.toString().toLowerCase();
  96                 }
  97
  98                 /**
  99                  * Call {@link SupportType#valueOf(String.toUpperCase())}.
 100                  *
 101                  * @param typeName
 102                  *            the possible type name
 103                  *
 104                  * @return NULL or the type
 105                  */
 106                 public static SupportType valueOfUC(String typeName) {
 107                         return SupportType.valueOf(typeName == null ? null : typeName
 108                                         .toUpperCase());
 109                 }
 110
 111                 /**
 112                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 113                  * NULL for NULL instead of raising exception.
 114                  *
 115                  * @param typeName
 116                  *            the possible type name
 117                  *
 118                  * @return NULL or the type
 119                  */
 120                 public static SupportType valueOfNullOkUC(String typeName) {
 121                         if (typeName == null) {
 122                                 return null;
 123                         }
 124
 125                         return SupportType.valueOfUC(typeName);
 126                 }
 127
 128                 /**
 129                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 130                  * NULL in case of error instead of raising an exception.
 131                  *
 132                  * @param typeName
 133                  *            the possible type name
 134                  *
 135                  * @return NULL or the type
 136                  */
 137                 public static SupportType valueOfAllOkUC(String typeName) {
 138                         try {
 139                                 return SupportType.valueOfUC(typeName);
 140                         } catch (Exception e) {
 141                                 return null;
 142                         }
 143                 }
 144         }
 145
 146         private InputStream in;
 147         private SupportType type;
 148         private URL currentReferer; // with on 'r', as in 'HTTP'...
 149
 150         // quote chars
 151         private char openQuote = Instance.getTrans().getChar(
 152                         StringId.OPEN_SINGLE_QUOTE);
 153         private char closeQuote = Instance.getTrans().getChar(
 154                         StringId.CLOSE_SINGLE_QUOTE);
 155         private char openDoubleQuote = Instance.getTrans().getChar(
 156                         StringId.OPEN_DOUBLE_QUOTE);
 157         private char closeDoubleQuote = Instance.getTrans().getChar(
 158                         StringId.CLOSE_DOUBLE_QUOTE);
 159
 160         /**
 161          * The name of this support class.
 162          *
 163          * @return the name
 164          */
 165         protected abstract String getSourceName();
 166
 167         /**
 168          * Check if the given resource is supported by this {@link BasicSupport}.
 169          *
 170          * @param url
 171          *            the resource to check for
 172          *
 173          * @return TRUE if it is
 174          */
 175         protected abstract boolean supports(URL url);
 176
 177         /**
 178          * Return TRUE if the support will return HTML encoded content values for
 179          * the chapters content.
 180          *
 181          * @return TRUE for HTML
 182          */
 183         protected abstract boolean isHtml();
 184
 185         protected abstract MetaData getMeta(URL source, InputStream in)
 186                         throws IOException;
 187
 188         /**
 189          * Return the story description.
 190          *
 191          * @param source
 192          *            the source of the story
 193          * @param in
 194          *            the input (the main resource)
 195          *
 196          * @return the description
 197          *
 198          * @throws IOException
 199          *             in case of I/O error
 200          */
 201         protected abstract String getDesc(URL source, InputStream in)
 202                         throws IOException;
 203
 204         /**
 205          * Return the list of chapters (name and resource).
 206          *
 207          * @param source
 208          *            the source of the story
 209          * @param in
 210          *            the input (the main resource)
 211          *
 212          * @return the chapters
 213          *
 214          * @throws IOException
 215          *             in case of I/O error
 216          */
 217         protected abstract List<Entry<String, URL>> getChapters(URL source,
 218                         InputStream in) throws IOException;
 219
 220         /**
 221          * Return the content of the chapter (possibly HTML encoded, if
 222          * {@link BasicSupport#isHtml()} is TRUE).
 223          *
 224          * @param source
 225          *            the source of the story
 226          * @param in
 227          *            the input (the main resource)
 228          * @param number
 229          *            the chapter number
 230          *
 231          * @return the content
 232          *
 233          * @throws IOException
 234          *             in case of I/O error
 235          */
 236         protected abstract String getChapterContent(URL source, InputStream in,
 237                         int number) throws IOException;
 238
 239         /**
 240          * Return the list of cookies (values included) that must be used to
 241          * correctly fetch the resources.
 242          * <p>
 243          * You are expected to call the super method implementation if you override
 244          * it.
 245          *
 246          * @return the cookies
 247          */
 248         public Map<String, String> getCookies() {
 249                 return new HashMap<String, String>();
 250         }
 251
 252         /**
 253          * Process the given story resource into a partially filled {@link Story}
 254          * object containing the name and metadata, except for the description.
 255          *
 256          * @param url
 257          *            the story resource
 258          *
 259          * @return the {@link Story}
 260          *
 261          * @throws IOException
 262          *             in case of I/O error
 263          */
 264         public Story processMeta(URL url) throws IOException {
 265                 return processMeta(url, true, false);
 266         }
 267
 268         /**
 269          * Process the given story resource into a partially filled {@link Story}
 270          * object containing the name and metadata.
 271          *
 272          * @param url
 273          *            the story resource
 274          *
 275          * @param close
 276          *            close "this" and "in" when done
 277          *
 278          * @return the {@link Story}
 279          *
 280          * @throws IOException
 281          *             in case of I/O error
 282          */
 283         protected Story processMeta(URL url, boolean close, boolean getDesc)
 284                         throws IOException {
 285                 in = Instance.getCache().open(url, this, false);
 286                 if (in == null) {
 287                         return null;
 288                 }
 289
 290                 try {
 291                         preprocess(url, getInput());
 292
 293                         Story story = new Story();
 294                         MetaData meta = getMeta(url, getInput());
 295                         story.setMeta(meta);
 296
 297                         if (meta != null && meta.getCover() == null) {
 298                                 meta.setCover(getDefaultCover(meta.getSubject()));
 299                         }
 300
 301                         if (getDesc) {
 302                                 String descChapterName = Instance.getTrans().getString(
 303                                                 StringId.DESCRIPTION);
 304                                 story.getMeta().setResume(
 305                                                 makeChapter(url, 0, descChapterName,
 306                                                                 getDesc(url, getInput())));
 307                         }
 308
 309                         return story;
 310                 } finally {
 311                         if (close) {
 312                                 try {
 313                                         close();
 314                                 } catch (IOException e) {
 315                                         Instance.syserr(e);
 316                                 }
 317
 318                                 if (in != null) {
 319                                         in.close();
 320                                 }
 321                         }
 322                 }
 323         }
 324
 325         /**
 326          * Process the given story resource into a fully filled {@link Story}
 327          * object.
 328          *
 329          * @param url
 330          *            the story resource
 331          *
 332          * @return the {@link Story}
 333          *
 334          * @throws IOException
 335          *             in case of I/O error
 336          */
 337         public Story process(URL url) throws IOException {
 338                 setCurrentReferer(url);
 339
 340                 try {
 341                         Story story = processMeta(url, false, true);
 342                         if (story == null) {
 343                                 return null;
 344                         }
 345
 346                         story.setChapters(new ArrayList<Chapter>());
 347
 348                         List<Entry<String, URL>> chapters = getChapters(url, getInput());
 349                         int i = 1;
 350                         if (chapters != null) {
 351                                 for (Entry<String, URL> chap : chapters) {
 352                                         setCurrentReferer(chap.getValue());
 353                                         InputStream chapIn = Instance.getCache().open(
 354                                                         chap.getValue(), this, true);
 355                                         try {
 356                                                 story.getChapters().add(
 357                                                                 makeChapter(url, i, chap.getKey(),
 358                                                                                 getChapterContent(url, chapIn, i)));
 359                                         } finally {
 360                                                 chapIn.close();
 361                                         }
 362
 363                                         i++;
 364                                 }
 365                         }
 366
 367                         return story;
 368
 369                 } finally {
 370                         try {
 371                                 close();
 372                         } catch (IOException e) {
 373                                 Instance.syserr(e);
 374                         }
 375
 376                         if (in != null) {
 377                                 in.close();
 378                         }
 379
 380                         currentReferer = null;
 381                 }
 382         }
 383
 384         /**
 385          * The support type.$
 386          *
 387          * @return the type
 388          */
 389         public SupportType getType() {
 390                 return type;
 391         }
 392
 393         /**
 394          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 395          * the current {@link URL} we work on.
 396          *
 397          * @return the referer
 398          */
 399         public URL getCurrentReferer() {
 400                 return currentReferer;
 401         }
 402
 403         /**
 404          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 405          * the current {@link URL} we work on.
 406          *
 407          * @param currentReferer
 408          *            the new referer
 409          */
 410         protected void setCurrentReferer(URL currentReferer) {
 411                 this.currentReferer = currentReferer;
 412         }
 413
 414         /**
 415          * The support type.
 416          *
 417          * @param type
 418          *            the new type
 419          *
 420          * @return this
 421          */
 422         protected BasicSupport setType(SupportType type) {
 423                 this.type = type;
 424                 return this;
 425         }
 426
 427         /**
 428          * Prepare the support if needed before processing.
 429          *
 430          * @param source
 431          *            the source of the story
 432          * @param in
 433          *            the input (the main resource)
 434          *
 435          * @throws IOException
 436          *             on I/O error
 437          */
 438         protected void preprocess(URL source, InputStream in) throws IOException {
 439         }
 440
 441         /**
 442          * Now that we have processed the {@link Story}, close the resources if any.
 443          *
 444          * @throws IOException
 445          *             on I/O error
 446          */
 447         protected void close() throws IOException {
 448         }
 449
 450         /**
 451          * Create a {@link Chapter} object from the given information, formatting
 452          * the content as it should be.
 453          *
 454          * @param number
 455          *            the chapter number
 456          * @param name
 457          *            the chapter name
 458          * @param content
 459          *            the chapter content
 460          *
 461          * @return the {@link Chapter}
 462          *
 463          * @throws IOException
 464          *             in case of I/O error
 465          */
 466         protected Chapter makeChapter(URL source, int number, String name,
 467                         String content) throws IOException {
 468
 469                 // Chapter name: process it correctly, then remove the possible
 470                 // redundant "Chapter x: " in front of it
 471                 String chapterName = processPara(name).getContent().trim();
 472                 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
 473                                 .split(",")) {
 474                         String chapterWord = Instance.getConfig().getStringX(
 475                                         Config.CHAPTER, lang);
 476                         if (chapterName.startsWith(chapterWord)) {
 477                                 chapterName = chapterName.substring(chapterWord.length())
 478                                                 .trim();
 479                                 break;
 480                         }
 481                 }
 482
 483                 if (chapterName.startsWith(Integer.toString(number))) {
 484                         chapterName = chapterName.substring(
 485                                         Integer.toString(number).length()).trim();
 486                 }
 487
 488                 if (chapterName.startsWith(":")) {
 489                         chapterName = chapterName.substring(1).trim();
 490                 }
 491                 //
 492
 493                 Chapter chap = new Chapter(number, chapterName);
 494
 495                 if (content == null) {
 496                         return chap;
 497                 }
 498
 499                 if (isHtml()) {
 500                         // Special <HR> processing:
 501                         content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
 502                                         "\n* * *\n");
 503                 }
 504
 505                 InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8"));
 506                 try {
 507                         @SuppressWarnings("resource")
 508                         Scanner scan = new Scanner(in, "UTF-8");
 509                         scan.useDelimiter("(\\n|</p>)"); // \n for test, </p> for html
 510
 511                         List<Paragraph> paras = new ArrayList<Paragraph>();
 512                         while (scan.hasNext()) {
 513                                 String line = scan.next().trim();
 514                                 boolean image = false;
 515                                 if (line.startsWith("[") && line.endsWith("]")) {
 516                                         URL url = getImageUrl(this, source,
 517                                                         line.substring(1, line.length() - 1).trim());
 518                                         if (url != null) {
 519                                                 paras.add(new Paragraph(url));
 520                                                 image = true;
 521                                         }
 522                                 }
 523
 524                                 if (!image) {
 525                                         paras.add(processPara(line));
 526                                 }
 527                         }
 528
 529                         // Check quotes for "bad" format
 530                         List<Paragraph> newParas = new ArrayList<Paragraph>();
 531                         for (Paragraph para : paras) {
 532                                 newParas.addAll(requotify(para));
 533                         }
 534                         paras = newParas;
 535
 536                         // Remove double blanks/brks
 537                         boolean space = false;
 538                         boolean brk = true;
 539                         for (int i = 0; i < paras.size(); i++) {
 540                                 Paragraph para = paras.get(i);
 541                                 boolean thisSpace = para.getType() == ParagraphType.BLANK;
 542                                 boolean thisBrk = para.getType() == ParagraphType.BREAK;
 543
 544                                 if (space && thisBrk) {
 545                                         paras.remove(i - 1);
 546                                         i--;
 547                                 } else if ((space || brk) && (thisSpace || thisBrk)) {
 548                                         paras.remove(i);
 549                                         i--;
 550                                 }
 551
 552                                 space = thisSpace;
 553                                 brk = thisBrk;
 554                         }
 555
 556                         // Remove blank/brk at start
 557                         if (paras.size() > 0
 558                                         && (paras.get(0).getType() == ParagraphType.BLANK || paras
 559                                                         .get(0).getType() == ParagraphType.BREAK)) {
 560                                 paras.remove(0);
 561                         }
 562
 563                         // Remove blank/brk at end
 564                         int last = paras.size() - 1;
 565                         if (paras.size() > 0
 566                                         && (paras.get(last).getType() == ParagraphType.BLANK || paras
 567                                                         .get(last).getType() == ParagraphType.BREAK)) {
 568                                 paras.remove(last);
 569                         }
 570
 571                         chap.setParagraphs(paras);
 572
 573                         return chap;
 574                 } finally {
 575                         in.close();
 576                 }
 577         }
 578
 579         static BufferedImage getDefaultCover(String subject) {
 580                 if (subject != null && !subject.isEmpty()
 581                                 && Instance.getCoverDir() != null) {
 582                         try {
 583                                 File fileCover = new File(Instance.getCoverDir(), subject);
 584                                 return getImage(null, fileCover.toURI().toURL(), subject);
 585                         } catch (MalformedURLException e) {
 586                         }
 587                 }
 588
 589                 return null;
 590         }
 591
 592         /**
 593          * Return the list of supported image extensions.
 594          *
 595          * @return the extensions
 596          */
 597         static String[] getImageExt(boolean emptyAllowed) {
 598                 if (emptyAllowed) {
 599                         return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 600                 } else {
 601                         return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 602                 }
 603         }
 604
 605         static BufferedImage getImage(BasicSupport support, URL source, String line) {
 606                 URL url = getImageUrl(support, source, line);
 607                 if (url != null) {
 608                         InputStream in = null;
 609                         try {
 610                                 in = Instance.getCache().open(url, getSupport(url), true);
 611                                 return IOUtils.toImage(in);
 612                         } catch (IOException e) {
 613                         } finally {
 614                                 if (in != null) {
 615                                         try {
 616                                                 in.close();
 617                                         } catch (IOException e) {
 618                                         }
 619                                 }
 620                         }
 621                 }
 622
 623                 return null;
 624         }
 625
 626         /**
 627          * Check if the given resource can be a local image or a remote image, then
 628          * refresh the cache with it if it is.
 629          *
 630          * @param source
 631          *            the story source
 632          * @param line
 633          *            the resource to check
 634          *
 635          * @return the image URL if found, or NULL
 636          *
 637          */
 638         static URL getImageUrl(BasicSupport support, URL source, String line) {
 639                 URL url = null;
 640
 641                 if (line != null) {
 642                         // try for files
 643                         String path = null;
 644                         if (source != null) {
 645                                 path = new File(source.getFile()).getParent();
 646                                 try {
 647                                         String basePath = new File(new File(path), line.trim())
 648                                                         .getAbsolutePath();
 649                                         for (String ext : getImageExt(true)) {
 650                                                 if (new File(basePath + ext).exists()) {
 651                                                         url = new File(basePath + ext).toURI().toURL();
 652                                                 }
 653                                         }
 654                                 } catch (Exception e) {
 655                                         // Nothing to do here
 656                                 }
 657                         }
 658
 659                         if (url == null) {
 660                                 // try for URLs
 661                                 try {
 662                                         for (String ext : getImageExt(true)) {
 663                                                 if (Instance.getCache().check(new URL(line + ext))) {
 664                                                         url = new URL(line + ext);
 665                                                         break;
 666                                                 }
 667                                         }
 668
 669                                         // try out of cache
 670                                         if (url == null) {
 671                                                 for (String ext : getImageExt(true)) {
 672                                                         try {
 673                                                                 url = new URL(line + ext);
 674                                                                 Instance.getCache().refresh(url, support, true);
 675                                                                 break;
 676                                                         } catch (IOException e) {
 677                                                                 // no image with this ext
 678                                                                 url = null;
 679                                                         }
 680                                                 }
 681                                         }
 682                                 } catch (MalformedURLException e) {
 683                                         // Not an url
 684                                 }
 685                         }
 686
 687                         // refresh the cached file
 688                         if (url != null) {
 689                                 try {
 690                                         Instance.getCache().refresh(url, support, true);
 691                                 } catch (IOException e) {
 692                                         // woops, broken image
 693                                         url = null;
 694                                 }
 695                         }
 696                 }
 697
 698                 return url;
 699         }
 700
 701         protected InputStream reset(InputStream in) {
 702                 try {
 703                         in.reset();
 704                 } catch (IOException e) {
 705                 }
 706                 return in;
 707         }
 708
 709         /**
 710          * Reset then return {@link BasicSupport#in}.
 711          *
 712          * @return {@link BasicSupport#in}
 713          */
 714         protected InputStream getInput() {
 715                 return reset(in);
 716         }
 717
 718         /**
 719          * Fix the author name if it is prefixed with some "by" {@link String}.
 720          *
 721          * @param author
 722          *            the author with a possible prefix
 723          *
 724          * @return the author without prefixes
 725          */
 726         protected String fixAuthor(String author) {
 727                 if (author != null) {
 728                         for (String suffix : new String[] { " ", ":" }) {
 729                                 for (String byString : Instance.getConfig()
 730                                                 .getString(Config.BYS).split(",")) {
 731                                         byString += suffix;
 732                                         if (author.toUpperCase().startsWith(byString.toUpperCase())) {
 733                                                 author = author.substring(byString.length()).trim();
 734                                         }
 735                                 }
 736                         }
 737
 738                         // Special case (without suffix):
 739                         if (author.startsWith("©")) {
 740                                 author = author.substring(1);
 741                         }
 742                 }
 743
 744                 return author;
 745         }
 746
 747         /**
 748          * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
 749          * and requotify them (i.e., separate them into QUOTE paragraphs and other
 750          * paragraphs (quotes or not)).
 751          *
 752          * @param para
 753          *            the paragraph to requotify (not necessaraly a quote)
 754          *
 755          * @return the correctly (or so we hope) quotified paragraphs
 756          */
 757         private List<Paragraph> requotify(Paragraph para) {
 758                 List<Paragraph> newParas = new ArrayList<Paragraph>();
 759
 760                 if (para.getType() == ParagraphType.QUOTE
 761                                 && para.getContent().length() > 2) {
 762                         String line = para.getContent();
 763                         boolean singleQ = line.startsWith("" + openQuote);
 764                         boolean doubleQ = line.startsWith("" + openDoubleQuote);
 765
 766                         // Do not try when more than one quote at a time
 767                         // (some stories are not easily readable if we do)
 768                         if (singleQ
 769                                         && line.indexOf(closeQuote, 1) < line
 770                                                         .lastIndexOf(closeQuote)) {
 771                                 newParas.add(para);
 772                                 return newParas;
 773                         }
 774                         if (doubleQ
 775                                         && line.indexOf(closeDoubleQuote, 1) < line
 776                                                         .lastIndexOf(closeDoubleQuote)) {
 777                                 newParas.add(para);
 778                                 return newParas;
 779                         }
 780                         //
 781
 782                         if (!singleQ && !doubleQ) {
 783                                 line = openDoubleQuote + line + closeDoubleQuote;
 784                                 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
 785                         } else {
 786                                 char open = singleQ ? openQuote : openDoubleQuote;
 787                                 char close = singleQ ? closeQuote : closeDoubleQuote;
 788
 789                                 int posDot = -1;
 790                                 boolean inQuote = false;
 791                                 int i = 0;
 792                                 for (char car : line.toCharArray()) {
 793                                         if (car == open) {
 794                                                 inQuote = true;
 795                                         } else if (car == close) {
 796                                                 inQuote = false;
 797                                         } else if (car == '.' && !inQuote) {
 798                                                 posDot = i;
 799                                                 break;
 800                                         }
 801                                         i++;
 802                                 }
 803
 804                                 if (posDot >= 0) {
 805                                         String rest = line.substring(posDot + 1).trim();
 806                                         line = line.substring(0, posDot + 1).trim();
 807                                         newParas.add(new Paragraph(ParagraphType.QUOTE, line));
 808                                         if (!rest.isEmpty()) {
 809                                                 newParas.addAll(requotify(processPara(rest)));
 810                                         }
 811                                 } else {
 812                                         newParas.add(para);
 813                                 }
 814                         }
 815                 } else {
 816                         newParas.add(para);
 817                 }
 818
 819                 return newParas;
 820         }
 821
 822         /**
 823          * Process a {@link Paragraph} from a raw line of text.
 824          * <p>
 825          * Will also fix quotes and HTML encoding if needed.
 826          *
 827          * @param line
 828          *            the raw line
 829          *
 830          * @return the processed {@link Paragraph}
 831          */
 832         private Paragraph processPara(String line) {
 833                 line = ifUnhtml(line).trim();
 834
 835                 boolean space = true;
 836                 boolean brk = true;
 837                 boolean quote = false;
 838                 boolean tentativeCloseQuote = false;
 839                 char prev = '\0';
 840                 int dashCount = 0;
 841
 842                 StringBuilder builder = new StringBuilder();
 843                 for (char car : line.toCharArray()) {
 844                         if (car != '-') {
 845                                 if (dashCount > 0) {
 846                                         // dash, ndash and mdash: - – —
 847                                         // currently: always use mdash
 848                                         builder.append(dashCount == 1 ? '-' : '—');
 849                                 }
 850                                 dashCount = 0;
 851                         }
 852
 853                         if (tentativeCloseQuote) {
 854                                 tentativeCloseQuote = false;
 855                                 if ((car >= 'a' && car <= 'z') || (car >= 'A' && car <= 'Z')
 856                                                 || (car >= '0' && car <= '9')) {
 857                                         builder.append("'");
 858                                 } else {
 859                                         builder.append(closeQuote);
 860                                 }
 861                         }
 862
 863                         switch (car) {
 864                         case ' ': // note: unbreakable space
 865                         case ' ':
 866                         case '\t':
 867                         case '\n': // just in case
 868                         case '\r': // just in case
 869                                 builder.append(' ');
 870                                 break;
 871
 872                         case '\'':
 873                                 if (space || (brk && quote)) {
 874                                         quote = true;
 875                                         builder.append(openQuote);
 876                                 } else if (prev == ' ') {
 877                                         builder.append(openQuote);
 878                                 } else {
 879                                         // it is a quote ("I'm off") or a 'quote' ("This
 880                                         // 'good' restaurant"...)
 881                                         tentativeCloseQuote = true;
 882                                 }
 883                                 break;
 884
 885                         case '"':
 886                                 if (space || (brk && quote)) {
 887                                         quote = true;
 888                                         builder.append(openDoubleQuote);
 889                                 } else if (prev == ' ') {
 890                                         builder.append(openDoubleQuote);
 891                                 } else {
 892                                         builder.append(closeDoubleQuote);
 893                                 }
 894                                 break;
 895
 896                         case '-':
 897                                 if (space) {
 898                                         quote = true;
 899                                 } else {
 900                                         dashCount++;
 901                                 }
 902                                 space = false;
 903                                 break;
 904
 905                         case '*':
 906                         case '~':
 907                         case '/':
 908                         case '\\':
 909                         case '<':
 910                         case '>':
 911                         case '=':
 912                         case '+':
 913                         case '_':
 914                         case '–':
 915                         case '—':
 916                                 space = false;
 917                                 builder.append(car);
 918                                 break;
 919
 920                         case '‘':
 921                         case '`':
 922                         case '‹':
 923                         case '﹁':
 924                         case '〈':
 925                         case '「':
 926                                 if (space || (brk && quote)) {
 927                                         quote = true;
 928                                         builder.append(openQuote);
 929                                 } else {
 930                                         builder.append(openQuote);
 931                                 }
 932                                 space = false;
 933                                 brk = false;
 934                                 break;
 935
 936                         case '’':
 937                         case '›':
 938                         case '﹂':
 939                         case '〉':
 940                         case '」':
 941                                 space = false;
 942                                 brk = false;
 943                                 builder.append(closeQuote);
 944                                 break;
 945
 946                         case '«':
 947                         case '“':
 948                         case '﹃':
 949                         case '《':
 950                         case '『':
 951                                 if (space || (brk && quote)) {
 952                                         quote = true;
 953                                         builder.append(openDoubleQuote);
 954                                 } else {
 955                                         builder.append(openDoubleQuote);
 956                                 }
 957                                 space = false;
 958                                 brk = false;
 959                                 break;
 960
 961                         case '»':
 962                         case '”':
 963                         case '﹄':
 964                         case '》':
 965                         case '』':
 966                                 space = false;
 967                                 brk = false;
 968                                 builder.append(closeDoubleQuote);
 969                                 break;
 970
 971                         default:
 972                                 space = false;
 973                                 brk = false;
 974                                 builder.append(car);
 975                                 break;
 976                         }
 977
 978                         prev = car;
 979                 }
 980
 981                 if (tentativeCloseQuote) {
 982                         tentativeCloseQuote = false;
 983                         builder.append(closeQuote);
 984                 }
 985
 986                 line = builder.toString().trim();
 987
 988                 ParagraphType type = ParagraphType.NORMAL;
 989                 if (space) {
 990                         type = ParagraphType.BLANK;
 991                 } else if (brk) {
 992                         type = ParagraphType.BREAK;
 993                 } else if (quote) {
 994                         type = ParagraphType.QUOTE;
 995                 }
 996
 997                 return new Paragraph(type, line);
 998         }
 999
1000         /**
1001          * Remove the HTML from the inpit <b>if</b> {@link BasicSupport#isHtml()} is
1002          * true.
1003          *
1004          * @param input
1005          *            the input
1006          *
1007          * @return the no html version if needed
1008          */
1009         private String ifUnhtml(String input) {
1010                 if (isHtml() && input != null) {
1011                         return StringUtils.unhtml(input);
1012                 }
1013
1014                 return input;
1015         }
1016
1017         /**
1018          * Return a {@link BasicSupport} implementation supporting the given
1019          * resource if possible.
1020          *
1021          * @param url
1022          *            the story resource
1023          *
1024          * @return an implementation that supports it, or NULL
1025          */
1026         public static BasicSupport getSupport(URL url) {
1027                 if (url == null) {
1028                         return null;
1029                 }
1030
1031                 // TEXT and INFO_TEXT always support files (not URLs though)
1032                 for (SupportType type : SupportType.values()) {
1033                         if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1034                                 BasicSupport support = getSupport(type);
1035                                 if (support != null && support.supports(url)) {
1036                                         return support;
1037                                 }
1038                         }
1039                 }
1040
1041                 for (SupportType type : new SupportType[] { SupportType.TEXT,
1042                                 SupportType.INFO_TEXT }) {
1043                         BasicSupport support = getSupport(type);
1044                         if (support != null && support.supports(url)) {
1045                                 return support;
1046                         }
1047                 }
1048
1049                 return null;
1050         }
1051
1052         /**
1053          * Return a {@link BasicSupport} implementation supporting the given type.
1054          *
1055          * @param type
1056          *            the type
1057          *
1058          * @return an implementation that supports it, or NULL
1059          */
1060         public static BasicSupport getSupport(SupportType type) {
1061                 switch (type) {
1062                 case EPUB:
1063                         return new Epub().setType(type);
1064                 case INFO_TEXT:
1065                         return new InfoText().setType(type);
1066                 case FIMFICTION:
1067                         return new Fimfiction().setType(type);
1068                 case FANFICTION:
1069                         return new Fanfiction().setType(type);
1070                 case TEXT:
1071                         return new Text().setType(type);
1072                 case MANGAFOX:
1073                         return new MangaFox().setType(type);
1074                 case E621:
1075                         return new E621().setType(type);
1076                 case CBZ:
1077                         return new Cbz().setType(type);
1078                 }
1079
1080                 return null;
1081         }
1082
1083         /**
1084          * Return the first line from the given input which correspond to the given
1085          * selectors.
1086          *
1087          * @param in
1088          *            the input
1089          * @param needle
1090          *            a string that must be found inside the target line (also
1091          *            supports "^" at start to say "only if it starts with" the
1092          *            needle)
1093          * @param relativeLine
1094          *            the line to return based upon the target line position (-1 =
1095          *            the line before, 0 = the target line...)
1096          *
1097          * @return the line
1098          */
1099         static String getLine(InputStream in, String needle, int relativeLine) {
1100                 return getLine(in, needle, relativeLine, true);
1101         }
1102
1103         /**
1104          * Return a line from the given input which correspond to the given
1105          * selectors.
1106          *
1107          * @param in
1108          *            the input
1109          * @param needle
1110          *            a string that must be found inside the target line (also
1111          *            supports "^" at start to say "only if it starts with" the
1112          *            needle)
1113          * @param relativeLine
1114          *            the line to return based upon the target line position (-1 =
1115          *            the line before, 0 = the target line...)
1116          * @param first
1117          *            takes the first result (as opposed to the last one, which will
1118          *            also always spend the input)
1119          *
1120          * @return the line
1121          */
1122         static String getLine(InputStream in, String needle, int relativeLine,
1123                         boolean first) {
1124                 String rep = null;
1125
1126                 try {
1127                         in.reset();
1128                 } catch (IOException e) {
1129                         Instance.syserr(e);
1130                 }
1131
1132                 List<String> lines = new ArrayList<String>();
1133                 @SuppressWarnings("resource")
1134                 Scanner scan = new Scanner(in, "UTF-8");
1135                 int index = -1;
1136                 scan.useDelimiter("\\n");
1137                 while (scan.hasNext()) {
1138                         lines.add(scan.next());
1139
1140                         if (index == -1) {
1141                                 if (needle.startsWith("^")) {
1142                                         if (lines.get(lines.size() - 1).startsWith(
1143                                                         needle.substring(1))) {
1144                                                 index = lines.size() - 1;
1145                                         }
1146
1147                                 } else {
1148                                         if (lines.get(lines.size() - 1).contains(needle)) {
1149                                                 index = lines.size() - 1;
1150                                         }
1151                                 }
1152                         }
1153
1154                         if (index >= 0 && index + relativeLine < lines.size()) {
1155                                 rep = lines.get(index + relativeLine);
1156                                 if (first) {
1157                                         break;
1158                                 }
1159                         }
1160                 }
1161
1162                 return rep;
1163         }
1164 }