nikiroo/fanfix/supported/BasicSupport.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.awt.image.BufferedImage;
   4 import java.io.BufferedReader;
   5 import java.io.ByteArrayInputStream;
   6 import java.io.File;
   7 import java.io.IOException;
   8 import java.io.InputStream;
   9 import java.io.InputStreamReader;
  10 import java.net.MalformedURLException;
  11 import java.net.URL;
  12 import java.util.ArrayList;
  13 import java.util.HashMap;
  14 import java.util.List;
  15 import java.util.Map;
  16 import java.util.Map.Entry;
  17 import java.util.Scanner;
  18
  19 import be.nikiroo.fanfix.Instance;
  20 import be.nikiroo.fanfix.bundles.Config;
  21 import be.nikiroo.fanfix.bundles.StringId;
  22 import be.nikiroo.fanfix.data.Chapter;
  23 import be.nikiroo.fanfix.data.MetaData;
  24 import be.nikiroo.fanfix.data.Paragraph;
  25 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
  26 import be.nikiroo.fanfix.data.Story;
  27 import be.nikiroo.utils.IOUtils;
  28 import be.nikiroo.utils.Progress;
  29 import be.nikiroo.utils.StringUtils;
  30
  31 /**
  32  * This class is the base class used by the other support classes. It can be
  33  * used outside of this package, and have static method that you can use to get
  34  * access to the correct support class.
  35  * <p>
  36  * It will be used with 'resources' (usually web pages or files).
  37  *
  38  * @author niki
  39  */
  40 public abstract class BasicSupport {
  41         /**
  42          * The supported input types for which we can get a {@link BasicSupport}
  43          * object.
  44          *
  45          * @author niki
  46          */
  47         public enum SupportType {
  48                 /** EPUB files created with this program */
  49                 EPUB,
  50                 /** Pure text file with some rules */
  51                 TEXT,
  52                 /** TEXT but with associated .info file */
  53                 INFO_TEXT,
  54                 /** My Little Pony fanfictions */
  55                 FIMFICTION,
  56                 /** Fanfictions from a lot of different universes */
  57                 FANFICTION,
  58                 /** Website with lots of Mangas */
  59                 MANGAFOX,
  60                 /** Furry website with comics support */
  61                 E621,
  62                 /** CBZ files */
  63                 CBZ;
  64
  65                 /**
  66                  * A description of this support type (more information than the
  67                  * {@link BasicSupport#getSourceName()}).
  68                  *
  69                  * @return the description
  70                  */
  71                 public String getDesc() {
  72                         String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
  73                                         this.name());
  74
  75                         if (desc == null) {
  76                                 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
  77                         }
  78
  79                         return desc;
  80                 }
  81
  82                 /**
  83                  * The name of this support type (a short version).
  84                  *
  85                  * @return the name
  86                  */
  87                 public String getSourceName() {
  88                         BasicSupport support = BasicSupport.getSupport(this);
  89                         if (support != null) {
  90                                 return support.getSourceName();
  91                         }
  92
  93                         return null;
  94                 }
  95
  96                 @Override
  97                 public String toString() {
  98                         return super.toString().toLowerCase();
  99                 }
 100
 101                 /**
 102                  * Call {@link SupportType#valueOf(String.toUpperCase())}.
 103                  *
 104                  * @param typeName
 105                  *            the possible type name
 106                  *
 107                  * @return NULL or the type
 108                  */
 109                 public static SupportType valueOfUC(String typeName) {
 110                         return SupportType.valueOf(typeName == null ? null : typeName
 111                                         .toUpperCase());
 112                 }
 113
 114                 /**
 115                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 116                  * NULL for NULL instead of raising exception.
 117                  *
 118                  * @param typeName
 119                  *            the possible type name
 120                  *
 121                  * @return NULL or the type
 122                  */
 123                 public static SupportType valueOfNullOkUC(String typeName) {
 124                         if (typeName == null) {
 125                                 return null;
 126                         }
 127
 128                         return SupportType.valueOfUC(typeName);
 129                 }
 130
 131                 /**
 132                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 133                  * NULL in case of error instead of raising an exception.
 134                  *
 135                  * @param typeName
 136                  *            the possible type name
 137                  *
 138                  * @return NULL or the type
 139                  */
 140                 public static SupportType valueOfAllOkUC(String typeName) {
 141                         try {
 142                                 return SupportType.valueOfUC(typeName);
 143                         } catch (Exception e) {
 144                                 return null;
 145                         }
 146                 }
 147         }
 148
 149         private InputStream in;
 150         private SupportType type;
 151         private URL currentReferer; // with on 'r', as in 'HTTP'...
 152
 153         // quote chars
 154         private char openQuote = Instance.getTrans().getChar(
 155                         StringId.OPEN_SINGLE_QUOTE);
 156         private char closeQuote = Instance.getTrans().getChar(
 157                         StringId.CLOSE_SINGLE_QUOTE);
 158         private char openDoubleQuote = Instance.getTrans().getChar(
 159                         StringId.OPEN_DOUBLE_QUOTE);
 160         private char closeDoubleQuote = Instance.getTrans().getChar(
 161                         StringId.CLOSE_DOUBLE_QUOTE);
 162
 163         /**
 164          * The name of this support class.
 165          *
 166          * @return the name
 167          */
 168         protected abstract String getSourceName();
 169
 170         /**
 171          * Check if the given resource is supported by this {@link BasicSupport}.
 172          *
 173          * @param url
 174          *            the resource to check for
 175          *
 176          * @return TRUE if it is
 177          */
 178         protected abstract boolean supports(URL url);
 179
 180         /**
 181          * Return TRUE if the support will return HTML encoded content values for
 182          * the chapters content.
 183          *
 184          * @return TRUE for HTML
 185          */
 186         protected abstract boolean isHtml();
 187
 188         protected abstract MetaData getMeta(URL source, InputStream in)
 189                         throws IOException;
 190
 191         /**
 192          * Return the story description.
 193          *
 194          * @param source
 195          *            the source of the story
 196          * @param in
 197          *            the input (the main resource)
 198          *
 199          * @return the description
 200          *
 201          * @throws IOException
 202          *             in case of I/O error
 203          */
 204         protected abstract String getDesc(URL source, InputStream in)
 205                         throws IOException;
 206
 207         /**
 208          * Return the list of chapters (name and resource).
 209          *
 210          * @param source
 211          *            the source of the story
 212          * @param in
 213          *            the input (the main resource)
 214          *
 215          * @return the chapters
 216          *
 217          * @throws IOException
 218          *             in case of I/O error
 219          */
 220         protected abstract List<Entry<String, URL>> getChapters(URL source,
 221                         InputStream in) throws IOException;
 222
 223         /**
 224          * Return the content of the chapter (possibly HTML encoded, if
 225          * {@link BasicSupport#isHtml()} is TRUE).
 226          *
 227          * @param source
 228          *            the source of the story
 229          * @param in
 230          *            the input (the main resource)
 231          * @param number
 232          *            the chapter number
 233          *
 234          * @return the content
 235          *
 236          * @throws IOException
 237          *             in case of I/O error
 238          */
 239         protected abstract String getChapterContent(URL source, InputStream in,
 240                         int number) throws IOException;
 241
 242         /**
 243          * Return the list of cookies (values included) that must be used to
 244          * correctly fetch the resources.
 245          * <p>
 246          * You are expected to call the super method implementation if you override
 247          * it.
 248          *
 249          * @return the cookies
 250          */
 251         public Map<String, String> getCookies() {
 252                 return new HashMap<String, String>();
 253         }
 254
 255         /**
 256          * Process the given story resource into a partially filled {@link Story}
 257          * object containing the name and metadata, except for the description.
 258          *
 259          * @param url
 260          *            the story resource
 261          *
 262          * @return the {@link Story}
 263          *
 264          * @throws IOException
 265          *             in case of I/O error
 266          */
 267         public Story processMeta(URL url) throws IOException {
 268                 return processMeta(url, true, false);
 269         }
 270
 271         /**
 272          * Process the given story resource into a partially filled {@link Story}
 273          * object containing the name and metadata.
 274          *
 275          * @param url
 276          *            the story resource
 277          *
 278          * @param close
 279          *            close "this" and "in" when done
 280          *
 281          * @return the {@link Story}
 282          *
 283          * @throws IOException
 284          *             in case of I/O error
 285          */
 286         protected Story processMeta(URL url, boolean close, boolean getDesc)
 287                         throws IOException {
 288                 in = Instance.getCache().open(url, this, false);
 289                 if (in == null) {
 290                         return null;
 291                 }
 292
 293                 try {
 294                         preprocess(url, getInput());
 295
 296                         Story story = new Story();
 297                         MetaData meta = getMeta(url, getInput());
 298                         story.setMeta(meta);
 299
 300                         if (meta != null && meta.getCover() == null) {
 301                                 meta.setCover(getDefaultCover(meta.getSubject()));
 302                         }
 303
 304                         if (getDesc) {
 305                                 String descChapterName = Instance.getTrans().getString(
 306                                                 StringId.DESCRIPTION);
 307                                 story.getMeta().setResume(
 308                                                 makeChapter(url, 0, descChapterName,
 309                                                                 getDesc(url, getInput())));
 310                         }
 311
 312                         return story;
 313                 } finally {
 314                         if (close) {
 315                                 try {
 316                                         close();
 317                                 } catch (IOException e) {
 318                                         Instance.syserr(e);
 319                                 }
 320
 321                                 if (in != null) {
 322                                         in.close();
 323                                 }
 324                         }
 325                 }
 326         }
 327
 328         /**
 329          * Process the given story resource into a fully filled {@link Story}
 330          * object.
 331          *
 332          * @param url
 333          *            the story resource
 334          * @param pg
 335          *            the optional progress reporter
 336          *
 337          * @return the {@link Story}
 338          *
 339          * @throws IOException
 340          *             in case of I/O error
 341          */
 342         public Story process(URL url, Progress pg) throws IOException {
 343                 if (pg == null) {
 344                         pg = new Progress();
 345                 } else {
 346                         pg.setMinMax(0, 100);
 347                 }
 348
 349                 setCurrentReferer(url);
 350
 351                 pg.setProgress(1);
 352                 try {
 353                         Story story = processMeta(url, false, true);
 354                         pg.setProgress(10);
 355                         if (story == null) {
 356                                 pg.setProgress(100);
 357                                 return null;
 358                         }
 359
 360                         story.setChapters(new ArrayList<Chapter>());
 361
 362                         List<Entry<String, URL>> chapters = getChapters(url, getInput());
 363                         pg.setProgress(20);
 364
 365                         int i = 1;
 366                         if (chapters != null) {
 367                                 Progress pgChaps = new Progress(0, chapters.size());
 368                                 pg.addProgress(pgChaps, 80);
 369
 370                                 for (Entry<String, URL> chap : chapters) {
 371                                         setCurrentReferer(chap.getValue());
 372                                         InputStream chapIn = Instance.getCache().open(
 373                                                         chap.getValue(), this, true);
 374                                         try {
 375                                                 story.getChapters().add(
 376                                                                 makeChapter(url, i, chap.getKey(),
 377                                                                                 getChapterContent(url, chapIn, i)));
 378                                         } finally {
 379                                                 chapIn.close();
 380                                         }
 381
 382                                         pgChaps.setProgress(i++);
 383                                 }
 384                         } else {
 385                                 pg.setProgress(100);
 386                         }
 387
 388                         return story;
 389
 390                 } finally {
 391                         try {
 392                                 close();
 393                         } catch (IOException e) {
 394                                 Instance.syserr(e);
 395                         }
 396
 397                         if (in != null) {
 398                                 in.close();
 399                         }
 400
 401                         currentReferer = null;
 402                 }
 403         }
 404
 405         /**
 406          * The support type.$
 407          *
 408          * @return the type
 409          */
 410         public SupportType getType() {
 411                 return type;
 412         }
 413
 414         /**
 415          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 416          * the current {@link URL} we work on.
 417          *
 418          * @return the referer
 419          */
 420         public URL getCurrentReferer() {
 421                 return currentReferer;
 422         }
 423
 424         /**
 425          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 426          * the current {@link URL} we work on.
 427          *
 428          * @param currentReferer
 429          *            the new referer
 430          */
 431         protected void setCurrentReferer(URL currentReferer) {
 432                 this.currentReferer = currentReferer;
 433         }
 434
 435         /**
 436          * The support type.
 437          *
 438          * @param type
 439          *            the new type
 440          *
 441          * @return this
 442          */
 443         protected BasicSupport setType(SupportType type) {
 444                 this.type = type;
 445                 return this;
 446         }
 447
 448         /**
 449          * Prepare the support if needed before processing.
 450          *
 451          * @param source
 452          *            the source of the story
 453          * @param in
 454          *            the input (the main resource)
 455          *
 456          * @throws IOException
 457          *             on I/O error
 458          */
 459         protected void preprocess(URL source, InputStream in) throws IOException {
 460         }
 461
 462         /**
 463          * Now that we have processed the {@link Story}, close the resources if any.
 464          *
 465          * @throws IOException
 466          *             on I/O error
 467          */
 468         protected void close() throws IOException {
 469         }
 470
 471         /**
 472          * Create a {@link Chapter} object from the given information, formatting
 473          * the content as it should be.
 474          *
 475          * @param number
 476          *            the chapter number
 477          * @param name
 478          *            the chapter name
 479          * @param content
 480          *            the chapter content
 481          *
 482          * @return the {@link Chapter}
 483          *
 484          * @throws IOException
 485          *             in case of I/O error
 486          */
 487         protected Chapter makeChapter(URL source, int number, String name,
 488                         String content) throws IOException {
 489                 // Chapter name: process it correctly, then remove the possible
 490                 // redundant "Chapter x: " in front of it
 491                 String chapterName = processPara(name).getContent().trim();
 492                 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
 493                                 .split(",")) {
 494                         String chapterWord = Instance.getConfig().getStringX(
 495                                         Config.CHAPTER, lang);
 496                         if (chapterName.startsWith(chapterWord)) {
 497                                 chapterName = chapterName.substring(chapterWord.length())
 498                                                 .trim();
 499                                 break;
 500                         }
 501                 }
 502
 503                 if (chapterName.startsWith(Integer.toString(number))) {
 504                         chapterName = chapterName.substring(
 505                                         Integer.toString(number).length()).trim();
 506                 }
 507
 508                 if (chapterName.startsWith(":")) {
 509                         chapterName = chapterName.substring(1).trim();
 510                 }
 511                 //
 512
 513                 Chapter chap = new Chapter(number, chapterName);
 514
 515                 if (content != null) {
 516                         chap.setParagraphs(makeParagraphs(source, content));
 517                 }
 518
 519                 return chap;
 520
 521         }
 522
 523         /**
 524          * Convert the given content into {@link Paragraph}s.
 525          *
 526          * @param source
 527          *            the source URL of the story
 528          * @param content
 529          *            the textual content
 530          *
 531          * @return the {@link Paragraph}s
 532          *
 533          * @throws IOException
 534          *             in case of I/O error
 535          */
 536         protected List<Paragraph> makeParagraphs(URL source, String content)
 537                         throws IOException {
 538                 if (isHtml()) {
 539                         // Special <HR> processing:
 540                         content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
 541                                         "\n* * *\n");
 542                 }
 543
 544                 List<Paragraph> paras = new ArrayList<Paragraph>();
 545                 InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8"));
 546                 try {
 547                         BufferedReader buff = new BufferedReader(new InputStreamReader(in,
 548                                         "UTF-8"));
 549
 550                         for (String encodedLine = buff.readLine(); encodedLine != null; encodedLine = buff
 551                                         .readLine()) {
 552                                 String lines[];
 553                                 if (isHtml()) {
 554                                         lines = encodedLine.split("(<p>|</p>|<br>|<br/>|\\n)");
 555                                 } else {
 556                                         lines = new String[] { encodedLine };
 557                                 }
 558
 559                                 for (String aline : lines) {
 560                                         String line = aline.trim();
 561
 562                                         URL image = null;
 563                                         if (line.startsWith("[") && line.endsWith("]")) {
 564                                                 image = getImageUrl(this, source,
 565                                                                 line.substring(1, line.length() - 1).trim());
 566                                         }
 567
 568                                         if (image != null) {
 569                                                 paras.add(new Paragraph(image));
 570                                         } else {
 571                                                 paras.add(processPara(line));
 572                                         }
 573                                 }
 574                         }
 575                 } finally {
 576                         in.close();
 577                 }
 578
 579                 // Check quotes for "bad" format
 580                 List<Paragraph> newParas = new ArrayList<Paragraph>();
 581                 for (Paragraph para : paras) {
 582                         newParas.addAll(requotify(para));
 583                 }
 584                 paras = newParas;
 585
 586                 // Remove double blanks/brks
 587                 fixBlanksBreaks(paras);
 588
 589                 return paras;
 590         }
 591
 592         /**
 593          * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
 594          * those {@link Paragraph}s.
 595          * <p>
 596          * The resulting list will not contain a starting or trailing blank/break
 597          * nor 2 blanks or breaks following each other.
 598          *
 599          * @param paras
 600          *            the list of {@link Paragraph}s to fix
 601          */
 602         protected void fixBlanksBreaks(List<Paragraph> paras) {
 603                 boolean space = false;
 604                 boolean brk = true;
 605                 for (int i = 0; i < paras.size(); i++) {
 606                         Paragraph para = paras.get(i);
 607                         boolean thisSpace = para.getType() == ParagraphType.BLANK;
 608                         boolean thisBrk = para.getType() == ParagraphType.BREAK;
 609
 610                         if (i > 0 && space && thisBrk) {
 611                                 paras.remove(i - 1);
 612                                 i--;
 613                         } else if ((space || brk) && (thisSpace || thisBrk)) {
 614                                 paras.remove(i);
 615                                 i--;
 616                         }
 617
 618                         space = thisSpace;
 619                         brk = thisBrk;
 620                 }
 621
 622                 // Remove blank/brk at start
 623                 if (paras.size() > 0
 624                                 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
 625                                                 0).getType() == ParagraphType.BREAK)) {
 626                         paras.remove(0);
 627                 }
 628
 629                 // Remove blank/brk at end
 630                 int last = paras.size() - 1;
 631                 if (paras.size() > 0
 632                                 && (paras.get(last).getType() == ParagraphType.BLANK || paras
 633                                                 .get(last).getType() == ParagraphType.BREAK)) {
 634                         paras.remove(last);
 635                 }
 636         }
 637
 638         /**
 639          * Get the default cover related to this subject (see <tt>.info</tt> files).
 640          *
 641          * @param subject
 642          *            the subject
 643          *
 644          * @return the cover if any, or NULL
 645          */
 646         static BufferedImage getDefaultCover(String subject) {
 647                 if (subject != null && !subject.isEmpty()
 648                                 && Instance.getCoverDir() != null) {
 649                         try {
 650                                 File fileCover = new File(Instance.getCoverDir(), subject);
 651                                 return getImage(null, fileCover.toURI().toURL(), subject);
 652                         } catch (MalformedURLException e) {
 653                         }
 654                 }
 655
 656                 return null;
 657         }
 658
 659         /**
 660          * Return the list of supported image extensions.
 661          *
 662          * @return the extensions
 663          */
 664         static String[] getImageExt(boolean emptyAllowed) {
 665                 if (emptyAllowed) {
 666                         return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 667                 } else {
 668                         return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 669                 }
 670         }
 671
 672         static BufferedImage getImage(BasicSupport support, URL source, String line) {
 673                 URL url = getImageUrl(support, source, line);
 674                 if (url != null) {
 675                         InputStream in = null;
 676                         try {
 677                                 in = Instance.getCache().open(url, getSupport(url), true);
 678                                 return IOUtils.toImage(in);
 679                         } catch (IOException e) {
 680                         } finally {
 681                                 if (in != null) {
 682                                         try {
 683                                                 in.close();
 684                                         } catch (IOException e) {
 685                                         }
 686                                 }
 687                         }
 688                 }
 689
 690                 return null;
 691         }
 692
 693         /**
 694          * Check if the given resource can be a local image or a remote image, then
 695          * refresh the cache with it if it is.
 696          *
 697          * @param source
 698          *            the story source
 699          * @param line
 700          *            the resource to check
 701          *
 702          * @return the image URL if found, or NULL
 703          *
 704          */
 705         static URL getImageUrl(BasicSupport support, URL source, String line) {
 706                 URL url = null;
 707
 708                 if (line != null) {
 709                         // try for files
 710                         String path = null;
 711                         if (source != null) {
 712                                 path = new File(source.getFile()).getParent();
 713                                 try {
 714                                         String basePath = new File(new File(path), line.trim())
 715                                                         .getAbsolutePath();
 716                                         for (String ext : getImageExt(true)) {
 717                                                 if (new File(basePath + ext).exists()) {
 718                                                         url = new File(basePath + ext).toURI().toURL();
 719                                                 }
 720                                         }
 721                                 } catch (Exception e) {
 722                                         // Nothing to do here
 723                                 }
 724                         }
 725
 726                         if (url == null) {
 727                                 // try for URLs
 728                                 try {
 729                                         for (String ext : getImageExt(true)) {
 730                                                 if (Instance.getCache().check(new URL(line + ext))) {
 731                                                         url = new URL(line + ext);
 732                                                         break;
 733                                                 }
 734                                         }
 735
 736                                         // try out of cache
 737                                         if (url == null) {
 738                                                 for (String ext : getImageExt(true)) {
 739                                                         try {
 740                                                                 url = new URL(line + ext);
 741                                                                 Instance.getCache().refresh(url, support, true);
 742                                                                 break;
 743                                                         } catch (IOException e) {
 744                                                                 // no image with this ext
 745                                                                 url = null;
 746                                                         }
 747                                                 }
 748                                         }
 749                                 } catch (MalformedURLException e) {
 750                                         // Not an url
 751                                 }
 752                         }
 753
 754                         // refresh the cached file
 755                         if (url != null) {
 756                                 try {
 757                                         Instance.getCache().refresh(url, support, true);
 758                                 } catch (IOException e) {
 759                                         // woops, broken image
 760                                         url = null;
 761                                 }
 762                         }
 763                 }
 764
 765                 return url;
 766         }
 767
 768         protected InputStream reset(InputStream in) {
 769                 try {
 770                         in.reset();
 771                 } catch (IOException e) {
 772                 }
 773                 return in;
 774         }
 775
 776         /**
 777          * Reset then return {@link BasicSupport#in}.
 778          *
 779          * @return {@link BasicSupport#in}
 780          */
 781         protected InputStream getInput() {
 782                 return reset(in);
 783         }
 784
 785         /**
 786          * Fix the author name if it is prefixed with some "by" {@link String}.
 787          *
 788          * @param author
 789          *            the author with a possible prefix
 790          *
 791          * @return the author without prefixes
 792          */
 793         protected String fixAuthor(String author) {
 794                 if (author != null) {
 795                         for (String suffix : new String[] { " ", ":" }) {
 796                                 for (String byString : Instance.getConfig()
 797                                                 .getString(Config.BYS).split(",")) {
 798                                         byString += suffix;
 799                                         if (author.toUpperCase().startsWith(byString.toUpperCase())) {
 800                                                 author = author.substring(byString.length()).trim();
 801                                         }
 802                                 }
 803                         }
 804
 805                         // Special case (without suffix):
 806                         if (author.startsWith("©")) {
 807                                 author = author.substring(1);
 808                         }
 809                 }
 810
 811                 return author;
 812         }
 813
 814         /**
 815          * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
 816          * and requotify them (i.e., separate them into QUOTE paragraphs and other
 817          * paragraphs (quotes or not)).
 818          *
 819          * @param para
 820          *            the paragraph to requotify (not necessaraly a quote)
 821          *
 822          * @return the correctly (or so we hope) quotified paragraphs
 823          */
 824         protected List<Paragraph> requotify(Paragraph para) {
 825                 List<Paragraph> newParas = new ArrayList<Paragraph>();
 826
 827                 if (para.getType() == ParagraphType.QUOTE
 828                                 && para.getContent().length() > 2) {
 829                         String line = para.getContent();
 830                         boolean singleQ = line.startsWith("" + openQuote);
 831                         boolean doubleQ = line.startsWith("" + openDoubleQuote);
 832
 833                         // Do not try when more than one quote at a time
 834                         // (some stories are not easily readable if we do)
 835                         if (singleQ
 836                                         && line.indexOf(closeQuote, 1) < line
 837                                                         .lastIndexOf(closeQuote)) {
 838                                 newParas.add(para);
 839                                 return newParas;
 840                         }
 841                         if (doubleQ
 842                                         && line.indexOf(closeDoubleQuote, 1) < line
 843                                                         .lastIndexOf(closeDoubleQuote)) {
 844                                 newParas.add(para);
 845                                 return newParas;
 846                         }
 847                         //
 848
 849                         if (!singleQ && !doubleQ) {
 850                                 line = openDoubleQuote + line + closeDoubleQuote;
 851                                 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
 852                         } else {
 853                                 char open = singleQ ? openQuote : openDoubleQuote;
 854                                 char close = singleQ ? closeQuote : closeDoubleQuote;
 855
 856                                 int posDot = -1;
 857                                 boolean inQuote = false;
 858                                 int i = 0;
 859                                 for (char car : line.toCharArray()) {
 860                                         if (car == open) {
 861                                                 inQuote = true;
 862                                         } else if (car == close) {
 863                                                 inQuote = false;
 864                                         } else if (car == '.' && !inQuote) {
 865                                                 posDot = i;
 866                                                 break;
 867                                         }
 868                                         i++;
 869                                 }
 870
 871                                 if (posDot >= 0) {
 872                                         String rest = line.substring(posDot + 1).trim();
 873                                         line = line.substring(0, posDot + 1).trim();
 874                                         newParas.add(new Paragraph(ParagraphType.QUOTE, line));
 875                                         if (!rest.isEmpty()) {
 876                                                 newParas.addAll(requotify(processPara(rest)));
 877                                         }
 878                                 } else {
 879                                         newParas.add(para);
 880                                 }
 881                         }
 882                 } else {
 883                         newParas.add(para);
 884                 }
 885
 886                 return newParas;
 887         }
 888
 889         /**
 890          * Process a {@link Paragraph} from a raw line of text.
 891          * <p>
 892          * Will also fix quotes and HTML encoding if needed.
 893          *
 894          * @param line
 895          *            the raw line
 896          *
 897          * @return the processed {@link Paragraph}
 898          */
 899         private Paragraph processPara(String line) {
 900                 line = ifUnhtml(line).trim();
 901
 902                 boolean space = true;
 903                 boolean brk = true;
 904                 boolean quote = false;
 905                 boolean tentativeCloseQuote = false;
 906                 char prev = '\0';
 907                 int dashCount = 0;
 908
 909                 StringBuilder builder = new StringBuilder();
 910                 for (char car : line.toCharArray()) {
 911                         if (car != '-') {
 912                                 if (dashCount > 0) {
 913                                         // dash, ndash and mdash: - – —
 914                                         // currently: always use mdash
 915                                         builder.append(dashCount == 1 ? '-' : '—');
 916                                 }
 917                                 dashCount = 0;
 918                         }
 919
 920                         if (tentativeCloseQuote) {
 921                                 tentativeCloseQuote = false;
 922                                 if ((car >= 'a' && car <= 'z') || (car >= 'A' && car <= 'Z')
 923                                                 || (car >= '0' && car <= '9')) {
 924                                         builder.append("'");
 925                                 } else {
 926                                         builder.append(closeQuote);
 927                                 }
 928                         }
 929
 930                         switch (car) {
 931                         case ' ': // note: unbreakable space
 932                         case ' ':
 933                         case '\t':
 934                         case '\n': // just in case
 935                         case '\r': // just in case
 936                                 builder.append(' ');
 937                                 break;
 938
 939                         case '\'':
 940                                 if (space || (brk && quote)) {
 941                                         quote = true;
 942                                         builder.append(openQuote);
 943                                 } else if (prev == ' ') {
 944                                         builder.append(openQuote);
 945                                 } else {
 946                                         // it is a quote ("I'm off") or a 'quote' ("This
 947                                         // 'good' restaurant"...)
 948                                         tentativeCloseQuote = true;
 949                                 }
 950                                 break;
 951
 952                         case '"':
 953                                 if (space || (brk && quote)) {
 954                                         quote = true;
 955                                         builder.append(openDoubleQuote);
 956                                 } else if (prev == ' ') {
 957                                         builder.append(openDoubleQuote);
 958                                 } else {
 959                                         builder.append(closeDoubleQuote);
 960                                 }
 961                                 break;
 962
 963                         case '-':
 964                                 if (space) {
 965                                         quote = true;
 966                                 } else {
 967                                         dashCount++;
 968                                 }
 969                                 space = false;
 970                                 break;
 971
 972                         case '*':
 973                         case '~':
 974                         case '/':
 975                         case '\\':
 976                         case '<':
 977                         case '>':
 978                         case '=':
 979                         case '+':
 980                         case '_':
 981                         case '–':
 982                         case '—':
 983                                 space = false;
 984                                 builder.append(car);
 985                                 break;
 986
 987                         case '‘':
 988                         case '`':
 989                         case '‹':
 990                         case '﹁':
 991                         case '〈':
 992                         case '「':
 993                                 if (space || (brk && quote)) {
 994                                         quote = true;
 995                                         builder.append(openQuote);
 996                                 } else {
 997                                         builder.append(openQuote);
 998                                 }
 999                                 space = false;
1000                                 brk = false;
1001                                 break;
1002
1003                         case '’':
1004                         case '›':
1005                         case '﹂':
1006                         case '〉':
1007                         case '」':
1008                                 space = false;
1009                                 brk = false;
1010                                 builder.append(closeQuote);
1011                                 break;
1012
1013                         case '«':
1014                         case '“':
1015                         case '﹃':
1016                         case '《':
1017                         case '『':
1018                                 if (space || (brk && quote)) {
1019                                         quote = true;
1020                                         builder.append(openDoubleQuote);
1021                                 } else {
1022                                         builder.append(openDoubleQuote);
1023                                 }
1024                                 space = false;
1025                                 brk = false;
1026                                 break;
1027
1028                         case '»':
1029                         case '”':
1030                         case '﹄':
1031                         case '》':
1032                         case '』':
1033                                 space = false;
1034                                 brk = false;
1035                                 builder.append(closeDoubleQuote);
1036                                 break;
1037
1038                         default:
1039                                 space = false;
1040                                 brk = false;
1041                                 builder.append(car);
1042                                 break;
1043                         }
1044
1045                         prev = car;
1046                 }
1047
1048                 if (tentativeCloseQuote) {
1049                         tentativeCloseQuote = false;
1050                         builder.append(closeQuote);
1051                 }
1052
1053                 line = builder.toString().trim();
1054
1055                 ParagraphType type = ParagraphType.NORMAL;
1056                 if (space) {
1057                         type = ParagraphType.BLANK;
1058                 } else if (brk) {
1059                         type = ParagraphType.BREAK;
1060                 } else if (quote) {
1061                         type = ParagraphType.QUOTE;
1062                 }
1063
1064                 return new Paragraph(type, line);
1065         }
1066
1067         /**
1068          * Remove the HTML from the inpit <b>if</b> {@link BasicSupport#isHtml()} is
1069          * true.
1070          *
1071          * @param input
1072          *            the input
1073          *
1074          * @return the no html version if needed
1075          */
1076         private String ifUnhtml(String input) {
1077                 if (isHtml() && input != null) {
1078                         return StringUtils.unhtml(input);
1079                 }
1080
1081                 return input;
1082         }
1083
1084         /**
1085          * Return a {@link BasicSupport} implementation supporting the given
1086          * resource if possible.
1087          *
1088          * @param url
1089          *            the story resource
1090          *
1091          * @return an implementation that supports it, or NULL
1092          */
1093         public static BasicSupport getSupport(URL url) {
1094                 if (url == null) {
1095                         return null;
1096                 }
1097
1098                 // TEXT and INFO_TEXT always support files (not URLs though)
1099                 for (SupportType type : SupportType.values()) {
1100                         if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1101                                 BasicSupport support = getSupport(type);
1102                                 if (support != null && support.supports(url)) {
1103                                         return support;
1104                                 }
1105                         }
1106                 }
1107
1108                 for (SupportType type : new SupportType[] { SupportType.TEXT,
1109                                 SupportType.INFO_TEXT }) {
1110                         BasicSupport support = getSupport(type);
1111                         if (support != null && support.supports(url)) {
1112                                 return support;
1113                         }
1114                 }
1115
1116                 return null;
1117         }
1118
1119         /**
1120          * Return a {@link BasicSupport} implementation supporting the given type.
1121          *
1122          * @param type
1123          *            the type
1124          *
1125          * @return an implementation that supports it, or NULL
1126          */
1127         public static BasicSupport getSupport(SupportType type) {
1128                 switch (type) {
1129                 case EPUB:
1130                         return new Epub().setType(type);
1131                 case INFO_TEXT:
1132                         return new InfoText().setType(type);
1133                 case FIMFICTION:
1134                         return new Fimfiction().setType(type);
1135                 case FANFICTION:
1136                         return new Fanfiction().setType(type);
1137                 case TEXT:
1138                         return new Text().setType(type);
1139                 case MANGAFOX:
1140                         return new MangaFox().setType(type);
1141                 case E621:
1142                         return new E621().setType(type);
1143                 case CBZ:
1144                         return new Cbz().setType(type);
1145                 }
1146
1147                 return null;
1148         }
1149
1150         /**
1151          * Return the first line from the given input which correspond to the given
1152          * selectors.
1153          *
1154          * @param in
1155          *            the input
1156          * @param needle
1157          *            a string that must be found inside the target line (also
1158          *            supports "^" at start to say "only if it starts with" the
1159          *            needle)
1160          * @param relativeLine
1161          *            the line to return based upon the target line position (-1 =
1162          *            the line before, 0 = the target line...)
1163          *
1164          * @return the line
1165          */
1166         static String getLine(InputStream in, String needle, int relativeLine) {
1167                 return getLine(in, needle, relativeLine, true);
1168         }
1169
1170         /**
1171          * Return a line from the given input which correspond to the given
1172          * selectors.
1173          *
1174          * @param in
1175          *            the input
1176          * @param needle
1177          *            a string that must be found inside the target line (also
1178          *            supports "^" at start to say "only if it starts with" the
1179          *            needle)
1180          * @param relativeLine
1181          *            the line to return based upon the target line position (-1 =
1182          *            the line before, 0 = the target line...)
1183          * @param first
1184          *            takes the first result (as opposed to the last one, which will
1185          *            also always spend the input)
1186          *
1187          * @return the line
1188          */
1189         static String getLine(InputStream in, String needle, int relativeLine,
1190                         boolean first) {
1191                 String rep = null;
1192
1193                 try {
1194                         in.reset();
1195                 } catch (IOException e) {
1196                         Instance.syserr(e);
1197                 }
1198
1199                 List<String> lines = new ArrayList<String>();
1200                 @SuppressWarnings("resource")
1201                 Scanner scan = new Scanner(in, "UTF-8");
1202                 int index = -1;
1203                 scan.useDelimiter("\\n");
1204                 while (scan.hasNext()) {
1205                         lines.add(scan.next());
1206
1207                         if (index == -1) {
1208                                 if (needle.startsWith("^")) {
1209                                         if (lines.get(lines.size() - 1).startsWith(
1210                                                         needle.substring(1))) {
1211                                                 index = lines.size() - 1;
1212                                         }
1213
1214                                 } else {
1215                                         if (lines.get(lines.size() - 1).contains(needle)) {
1216                                                 index = lines.size() - 1;
1217                                         }
1218                                 }
1219                         }
1220
1221                         if (index >= 0 && index + relativeLine < lines.size()) {
1222                                 rep = lines.get(index + relativeLine);
1223                                 if (first) {
1224                                         break;
1225                                 }
1226                         }
1227                 }
1228
1229                 return rep;
1230         }
1231 }