src/be/nikiroo/fanfix/supported/BasicSupport.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.awt.image.BufferedImage;
   4 import java.io.BufferedReader;
   5 import java.io.ByteArrayInputStream;
   6 import java.io.File;
   7 import java.io.IOException;
   8 import java.io.InputStream;
   9 import java.io.InputStreamReader;
  10 import java.net.MalformedURLException;
  11 import java.net.URL;
  12 import java.util.ArrayList;
  13 import java.util.HashMap;
  14 import java.util.List;
  15 import java.util.Map;
  16 import java.util.Map.Entry;
  17 import java.util.Scanner;
  18
  19 import be.nikiroo.fanfix.Instance;
  20 import be.nikiroo.fanfix.bundles.Config;
  21 import be.nikiroo.fanfix.bundles.StringId;
  22 import be.nikiroo.fanfix.data.Chapter;
  23 import be.nikiroo.fanfix.data.MetaData;
  24 import be.nikiroo.fanfix.data.Paragraph;
  25 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
  26 import be.nikiroo.fanfix.data.Story;
  27 import be.nikiroo.utils.IOUtils;
  28 import be.nikiroo.utils.Progress;
  29 import be.nikiroo.utils.StringUtils;
  30
  31 /**
  32  * This class is the base class used by the other support classes. It can be
  33  * used outside of this package, and have static method that you can use to get
  34  * access to the correct support class.
  35  * <p>
  36  * It will be used with 'resources' (usually web pages or files).
  37  *
  38  * @author niki
  39  */
  40 public abstract class BasicSupport {
  41         /**
  42          * The supported input types for which we can get a {@link BasicSupport}
  43          * object.
  44          *
  45          * @author niki
  46          */
  47         public enum SupportType {
  48                 /** EPUB files created with this program */
  49                 EPUB,
  50                 /** Pure text file with some rules */
  51                 TEXT,
  52                 /** TEXT but with associated .info file */
  53                 INFO_TEXT,
  54                 /** My Little Pony fanfictions */
  55                 FIMFICTION,
  56                 /** Fanfictions from a lot of different universes */
  57                 FANFICTION,
  58                 /** Website with lots of Mangas */
  59                 MANGAFOX,
  60                 /** Furry website with comics support */
  61                 E621,
  62                 /** Furry website with stories */
  63                 YIFFSTAR,
  64                 /** CBZ files */
  65                 CBZ,
  66                 /** HTML files */
  67                 HTML;
  68
  69                 /**
  70                  * A description of this support type (more information than the
  71                  * {@link BasicSupport#getSourceName()}).
  72                  *
  73                  * @return the description
  74                  */
  75                 public String getDesc() {
  76                         String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
  77                                         this.name());
  78
  79                         if (desc == null) {
  80                                 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
  81                         }
  82
  83                         return desc;
  84                 }
  85
  86                 /**
  87                  * The name of this support type (a short version).
  88                  *
  89                  * @return the name
  90                  */
  91                 public String getSourceName() {
  92                         BasicSupport support = BasicSupport.getSupport(this);
  93                         if (support != null) {
  94                                 return support.getSourceName();
  95                         }
  96
  97                         return null;
  98                 }
  99
 100                 @Override
 101                 public String toString() {
 102                         return super.toString().toLowerCase();
 103                 }
 104
 105                 /**
 106                  * Call {@link SupportType#valueOf(String.toUpperCase())}.
 107                  *
 108                  * @param typeName
 109                  *            the possible type name
 110                  *
 111                  * @return NULL or the type
 112                  */
 113                 public static SupportType valueOfUC(String typeName) {
 114                         return SupportType.valueOf(typeName == null ? null : typeName
 115                                         .toUpperCase());
 116                 }
 117
 118                 /**
 119                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 120                  * NULL for NULL instead of raising exception.
 121                  *
 122                  * @param typeName
 123                  *            the possible type name
 124                  *
 125                  * @return NULL or the type
 126                  */
 127                 public static SupportType valueOfNullOkUC(String typeName) {
 128                         if (typeName == null) {
 129                                 return null;
 130                         }
 131
 132                         return SupportType.valueOfUC(typeName);
 133                 }
 134
 135                 /**
 136                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 137                  * NULL in case of error instead of raising an exception.
 138                  *
 139                  * @param typeName
 140                  *            the possible type name
 141                  *
 142                  * @return NULL or the type
 143                  */
 144                 public static SupportType valueOfAllOkUC(String typeName) {
 145                         try {
 146                                 return SupportType.valueOfUC(typeName);
 147                         } catch (Exception e) {
 148                                 return null;
 149                         }
 150                 }
 151         }
 152
 153         private InputStream in;
 154         private SupportType type;
 155         private URL currentReferer; // with only one 'r', as in 'HTTP'...
 156
 157         // quote chars
 158         private char openQuote = Instance.getTrans().getChar(
 159                         StringId.OPEN_SINGLE_QUOTE);
 160         private char closeQuote = Instance.getTrans().getChar(
 161                         StringId.CLOSE_SINGLE_QUOTE);
 162         private char openDoubleQuote = Instance.getTrans().getChar(
 163                         StringId.OPEN_DOUBLE_QUOTE);
 164         private char closeDoubleQuote = Instance.getTrans().getChar(
 165                         StringId.CLOSE_DOUBLE_QUOTE);
 166
 167         /**
 168          * The name of this support class.
 169          *
 170          * @return the name
 171          */
 172         protected abstract String getSourceName();
 173
 174         /**
 175          * Check if the given resource is supported by this {@link BasicSupport}.
 176          *
 177          * @param url
 178          *            the resource to check for
 179          *
 180          * @return TRUE if it is
 181          */
 182         protected abstract boolean supports(URL url);
 183
 184         /**
 185          * Return TRUE if the support will return HTML encoded content values for
 186          * the chapters content.
 187          *
 188          * @return TRUE for HTML
 189          */
 190         protected abstract boolean isHtml();
 191
 192         protected abstract MetaData getMeta(URL source, InputStream in)
 193                         throws IOException;
 194
 195         /**
 196          * Return the story description.
 197          *
 198          * @param source
 199          *            the source of the story
 200          * @param in
 201          *            the input (the main resource)
 202          *
 203          * @return the description
 204          *
 205          * @throws IOException
 206          *             in case of I/O error
 207          */
 208         protected abstract String getDesc(URL source, InputStream in)
 209                         throws IOException;
 210
 211         /**
 212          * Return the list of chapters (name and resource).
 213          *
 214          * @param source
 215          *            the source of the story
 216          * @param in
 217          *            the input (the main resource)
 218          *
 219          * @return the chapters
 220          *
 221          * @throws IOException
 222          *             in case of I/O error
 223          */
 224         protected abstract List<Entry<String, URL>> getChapters(URL source,
 225                         InputStream in) throws IOException;
 226
 227         /**
 228          * Return the content of the chapter (possibly HTML encoded, if
 229          * {@link BasicSupport#isHtml()} is TRUE).
 230          *
 231          * @param source
 232          *            the source of the story
 233          * @param in
 234          *            the input (the main resource)
 235          * @param number
 236          *            the chapter number
 237          *
 238          * @return the content
 239          *
 240          * @throws IOException
 241          *             in case of I/O error
 242          */
 243         protected abstract String getChapterContent(URL source, InputStream in,
 244                         int number) throws IOException;
 245
 246         /**
 247          * Return the list of cookies (values included) that must be used to
 248          * correctly fetch the resources.
 249          * <p>
 250          * You are expected to call the super method implementation if you override
 251          * it.
 252          *
 253          * @return the cookies
 254          */
 255         public Map<String, String> getCookies() {
 256                 return new HashMap<String, String>();
 257         }
 258
 259         /**
 260          * Return the canonical form of the main {@link URL}.
 261          *
 262          * @param source
 263          *            the source {@link URL}
 264          *
 265          * @return the canonical form of this {@link URL}
 266          *
 267          * @throws IOException
 268          *             in case of I/O error
 269          */
 270         public URL getCanonicalUrl(URL source) throws IOException {
 271                 return source;
 272         }
 273
 274         /**
 275          * Process the given story resource into a partially filled {@link Story}
 276          * object containing the name and metadata, except for the description.
 277          *
 278          * @param url
 279          *            the story resource
 280          *
 281          * @return the {@link Story}
 282          *
 283          * @throws IOException
 284          *             in case of I/O error
 285          */
 286         public Story processMeta(URL url) throws IOException {
 287                 return processMeta(url, true, false);
 288         }
 289
 290         /**
 291          * Process the given story resource into a partially filled {@link Story}
 292          * object containing the name and metadata.
 293          *
 294          * @param url
 295          *            the story resource
 296          *
 297          * @param close
 298          *            close "this" and "in" when done
 299          *
 300          * @return the {@link Story}
 301          *
 302          * @throws IOException
 303          *             in case of I/O error
 304          */
 305         protected Story processMeta(URL url, boolean close, boolean getDesc)
 306                         throws IOException {
 307                 url = getCanonicalUrl(url);
 308
 309                 setCurrentReferer(url);
 310
 311                 in = openInput(url);
 312                 if (in == null) {
 313                         return null;
 314                 }
 315
 316                 try {
 317                         preprocess(url, getInput());
 318
 319                         Story story = new Story();
 320                         MetaData meta = getMeta(url, getInput());
 321                         story.setMeta(meta);
 322
 323                         if (meta != null && meta.getCover() == null) {
 324                                 meta.setCover(getDefaultCover(meta.getSubject()));
 325                         }
 326
 327                         if (getDesc) {
 328                                 String descChapterName = Instance.getTrans().getString(
 329                                                 StringId.DESCRIPTION);
 330                                 story.getMeta().setResume(
 331                                                 makeChapter(url, 0, descChapterName,
 332                                                                 getDesc(url, getInput())));
 333                         }
 334
 335                         return story;
 336                 } finally {
 337                         if (close) {
 338                                 try {
 339                                         close();
 340                                 } catch (IOException e) {
 341                                         Instance.syserr(e);
 342                                 }
 343
 344                                 if (in != null) {
 345                                         in.close();
 346                                 }
 347                         }
 348
 349                         setCurrentReferer(null);
 350                 }
 351         }
 352
 353         /**
 354          * Process the given story resource into a fully filled {@link Story}
 355          * object.
 356          *
 357          * @param url
 358          *            the story resource
 359          * @param pg
 360          *            the optional progress reporter
 361          *
 362          * @return the {@link Story}
 363          *
 364          * @throws IOException
 365          *             in case of I/O error
 366          */
 367         public Story process(URL url, Progress pg) throws IOException {
 368                 if (pg == null) {
 369                         pg = new Progress();
 370                 } else {
 371                         pg.setMinMax(0, 100);
 372                 }
 373
 374                 url = getCanonicalUrl(url);
 375                 pg.setProgress(1);
 376                 try {
 377                         Story story = processMeta(url, false, true);
 378                         pg.setProgress(10);
 379                         if (story == null) {
 380                                 pg.setProgress(100);
 381                                 return null;
 382                         }
 383
 384                         setCurrentReferer(url);
 385
 386                         story.setChapters(new ArrayList<Chapter>());
 387
 388                         List<Entry<String, URL>> chapters = getChapters(url, getInput());
 389                         pg.setProgress(20);
 390
 391                         int i = 1;
 392                         if (chapters != null) {
 393                                 Progress pgChaps = new Progress(0, chapters.size());
 394                                 pg.addProgress(pgChaps, 80);
 395
 396                                 for (Entry<String, URL> chap : chapters) {
 397                                         setCurrentReferer(chap.getValue());
 398                                         InputStream chapIn = Instance.getCache().open(
 399                                                         chap.getValue(), this, true);
 400                                         try {
 401                                                 story.getChapters().add(
 402                                                                 makeChapter(url, i, chap.getKey(),
 403                                                                                 getChapterContent(url, chapIn, i)));
 404                                         } finally {
 405                                                 chapIn.close();
 406                                         }
 407
 408                                         pgChaps.setProgress(i++);
 409                                 }
 410                         } else {
 411                                 pg.setProgress(100);
 412                         }
 413
 414                         return story;
 415
 416                 } finally {
 417                         try {
 418                                 close();
 419                         } catch (IOException e) {
 420                                 Instance.syserr(e);
 421                         }
 422
 423                         if (in != null) {
 424                                 in.close();
 425                         }
 426
 427                         setCurrentReferer(null);
 428                 }
 429         }
 430
 431         /**
 432          * The support type.
 433          *
 434          * @return the type
 435          */
 436         public SupportType getType() {
 437                 return type;
 438         }
 439
 440         /**
 441          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 442          * the current {@link URL} we work on.
 443          *
 444          * @return the referer
 445          */
 446         public URL getCurrentReferer() {
 447                 return currentReferer;
 448         }
 449
 450         /**
 451          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 452          * the current {@link URL} we work on.
 453          *
 454          * @param currentReferer
 455          *            the new referer
 456          */
 457         protected void setCurrentReferer(URL currentReferer) {
 458                 this.currentReferer = currentReferer;
 459         }
 460
 461         /**
 462          * The support type.
 463          *
 464          * @param type
 465          *            the new type
 466          *
 467          * @return this
 468          */
 469         protected BasicSupport setType(SupportType type) {
 470                 this.type = type;
 471                 return this;
 472         }
 473
 474         /**
 475          * Prepare the support if needed before processing.
 476          *
 477          * @param source
 478          *            the source of the story
 479          * @param in
 480          *            the input (the main resource)
 481          *
 482          * @throws IOException
 483          *             on I/O error
 484          */
 485         protected void preprocess(URL source, InputStream in) throws IOException {
 486         }
 487
 488         /**
 489          * Now that we have processed the {@link Story}, close the resources if any.
 490          *
 491          * @throws IOException
 492          *             on I/O error
 493          */
 494         protected void close() throws IOException {
 495         }
 496
 497         /**
 498          * Create a {@link Chapter} object from the given information, formatting
 499          * the content as it should be.
 500          *
 501          * @param number
 502          *            the chapter number
 503          * @param name
 504          *            the chapter name
 505          * @param content
 506          *            the chapter content
 507          *
 508          * @return the {@link Chapter}
 509          *
 510          * @throws IOException
 511          *             in case of I/O error
 512          */
 513         protected Chapter makeChapter(URL source, int number, String name,
 514                         String content) throws IOException {
 515                 // Chapter name: process it correctly, then remove the possible
 516                 // redundant "Chapter x: " in front of it
 517                 String chapterName = processPara(name).getContent().trim();
 518                 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
 519                                 .split(",")) {
 520                         String chapterWord = Instance.getConfig().getStringX(
 521                                         Config.CHAPTER, lang);
 522                         if (chapterName.startsWith(chapterWord)) {
 523                                 chapterName = chapterName.substring(chapterWord.length())
 524                                                 .trim();
 525                                 break;
 526                         }
 527                 }
 528
 529                 if (chapterName.startsWith(Integer.toString(number))) {
 530                         chapterName = chapterName.substring(
 531                                         Integer.toString(number).length()).trim();
 532                 }
 533
 534                 if (chapterName.startsWith(":")) {
 535                         chapterName = chapterName.substring(1).trim();
 536                 }
 537                 //
 538
 539                 Chapter chap = new Chapter(number, chapterName);
 540
 541                 if (content != null) {
 542                         chap.setParagraphs(makeParagraphs(source, content));
 543                 }
 544
 545                 return chap;
 546
 547         }
 548
 549         /**
 550          * Convert the given content into {@link Paragraph}s.
 551          *
 552          * @param source
 553          *            the source URL of the story
 554          * @param content
 555          *            the textual content
 556          *
 557          * @return the {@link Paragraph}s
 558          *
 559          * @throws IOException
 560          *             in case of I/O error
 561          */
 562         protected List<Paragraph> makeParagraphs(URL source, String content)
 563                         throws IOException {
 564                 if (isHtml()) {
 565                         // Special <HR> processing:
 566                         content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
 567                                         "\n* * *\n");
 568                 }
 569
 570                 List<Paragraph> paras = new ArrayList<Paragraph>();
 571                 InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8"));
 572                 try {
 573                         BufferedReader buff = new BufferedReader(new InputStreamReader(in,
 574                                         "UTF-8"));
 575
 576                         for (String encodedLine = buff.readLine(); encodedLine != null; encodedLine = buff
 577                                         .readLine()) {
 578                                 String lines[];
 579                                 if (isHtml()) {
 580                                         lines = encodedLine.split("(<p>|</p>|<br>|<br/>|\\n)");
 581                                 } else {
 582                                         lines = new String[] { encodedLine };
 583                                 }
 584
 585                                 for (String aline : lines) {
 586                                         String line = aline.trim();
 587
 588                                         URL image = null;
 589                                         if (line.startsWith("[") && line.endsWith("]")) {
 590                                                 image = getImageUrl(this, source,
 591                                                                 line.substring(1, line.length() - 1).trim());
 592                                         }
 593
 594                                         if (image != null) {
 595                                                 paras.add(new Paragraph(image));
 596                                         } else {
 597                                                 paras.add(processPara(line));
 598                                         }
 599                                 }
 600                         }
 601                 } finally {
 602                         in.close();
 603                 }
 604
 605                 // Check quotes for "bad" format
 606                 List<Paragraph> newParas = new ArrayList<Paragraph>();
 607                 for (Paragraph para : paras) {
 608                         newParas.addAll(requotify(para));
 609                 }
 610                 paras = newParas;
 611
 612                 // Remove double blanks/brks
 613                 fixBlanksBreaks(paras);
 614
 615                 return paras;
 616         }
 617
 618         /**
 619          * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
 620          * those {@link Paragraph}s.
 621          * <p>
 622          * The resulting list will not contain a starting or trailing blank/break
 623          * nor 2 blanks or breaks following each other.
 624          *
 625          * @param paras
 626          *            the list of {@link Paragraph}s to fix
 627          */
 628         protected void fixBlanksBreaks(List<Paragraph> paras) {
 629                 boolean space = false;
 630                 boolean brk = true;
 631                 for (int i = 0; i < paras.size(); i++) {
 632                         Paragraph para = paras.get(i);
 633                         boolean thisSpace = para.getType() == ParagraphType.BLANK;
 634                         boolean thisBrk = para.getType() == ParagraphType.BREAK;
 635
 636                         if (i > 0 && space && thisBrk) {
 637                                 paras.remove(i - 1);
 638                                 i--;
 639                         } else if ((space || brk) && (thisSpace || thisBrk)) {
 640                                 paras.remove(i);
 641                                 i--;
 642                         }
 643
 644                         space = thisSpace;
 645                         brk = thisBrk;
 646                 }
 647
 648                 // Remove blank/brk at start
 649                 if (paras.size() > 0
 650                                 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
 651                                                 0).getType() == ParagraphType.BREAK)) {
 652                         paras.remove(0);
 653                 }
 654
 655                 // Remove blank/brk at end
 656                 int last = paras.size() - 1;
 657                 if (paras.size() > 0
 658                                 && (paras.get(last).getType() == ParagraphType.BLANK || paras
 659                                                 .get(last).getType() == ParagraphType.BREAK)) {
 660                         paras.remove(last);
 661                 }
 662         }
 663
 664         /**
 665          * Get the default cover related to this subject (see <tt>.info</tt> files).
 666          *
 667          * @param subject
 668          *            the subject
 669          *
 670          * @return the cover if any, or NULL
 671          */
 672         static BufferedImage getDefaultCover(String subject) {
 673                 if (subject != null && !subject.isEmpty()
 674                                 && Instance.getCoverDir() != null) {
 675                         try {
 676                                 File fileCover = new File(Instance.getCoverDir(), subject);
 677                                 return getImage(null, fileCover.toURI().toURL(), subject);
 678                         } catch (MalformedURLException e) {
 679                         }
 680                 }
 681
 682                 return null;
 683         }
 684
 685         /**
 686          * Return the list of supported image extensions.
 687          *
 688          * @param emptyAllowed
 689          *            TRUE to allow an empty extension on first place, which can be
 690          *            used when you may already have an extension in your input but
 691          *            are not sure about it
 692          *
 693          * @return the extensions
 694          */
 695         static String[] getImageExt(boolean emptyAllowed) {
 696                 if (emptyAllowed) {
 697                         return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 698                 } else {
 699                         return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 700                 }
 701         }
 702
 703         /**
 704          * Check if the given resource can be a local image or a remote image, then
 705          * refresh the cache with it if it is.
 706          *
 707          * @param source
 708          *            the story source
 709          * @param line
 710          *            the resource to check
 711          *
 712          * @return the image if found, or NULL
 713          *
 714          */
 715         static BufferedImage getImage(BasicSupport support, URL source, String line) {
 716                 URL url = getImageUrl(support, source, line);
 717                 if (url != null) {
 718                         InputStream in = null;
 719                         try {
 720                                 in = Instance.getCache().open(url, getSupport(url), true);
 721                                 return IOUtils.toImage(in);
 722                         } catch (IOException e) {
 723                         } finally {
 724                                 if (in != null) {
 725                                         try {
 726                                                 in.close();
 727                                         } catch (IOException e) {
 728                                         }
 729                                 }
 730                         }
 731                 }
 732
 733                 return null;
 734         }
 735
 736         /**
 737          * Check if the given resource can be a local image or a remote image, then
 738          * refresh the cache with it if it is.
 739          *
 740          * @param source
 741          *            the story source
 742          * @param line
 743          *            the resource to check
 744          *
 745          * @return the image URL if found, or NULL
 746          *
 747          */
 748         static URL getImageUrl(BasicSupport support, URL source, String line) {
 749                 URL url = null;
 750
 751                 if (line != null) {
 752                         // try for files
 753                         String path = null;
 754                         if (source != null) {
 755                                 path = new File(source.getFile()).getParent();
 756                                 try {
 757                                         String basePath = new File(new File(path), line.trim())
 758                                                         .getAbsolutePath();
 759                                         for (String ext : getImageExt(true)) {
 760                                                 if (new File(basePath + ext).exists()) {
 761                                                         url = new File(basePath + ext).toURI().toURL();
 762                                                 }
 763                                         }
 764                                 } catch (Exception e) {
 765                                         // Nothing to do here
 766                                 }
 767                         }
 768
 769                         if (url == null) {
 770                                 // try for URLs
 771                                 try {
 772                                         for (String ext : getImageExt(true)) {
 773                                                 if (Instance.getCache().check(new URL(line + ext))) {
 774                                                         url = new URL(line + ext);
 775                                                         break;
 776                                                 }
 777                                         }
 778
 779                                         // try out of cache
 780                                         if (url == null) {
 781                                                 for (String ext : getImageExt(true)) {
 782                                                         try {
 783                                                                 url = new URL(line + ext);
 784                                                                 Instance.getCache().refresh(url, support, true);
 785                                                                 break;
 786                                                         } catch (IOException e) {
 787                                                                 // no image with this ext
 788                                                                 url = null;
 789                                                         }
 790                                                 }
 791                                         }
 792                                 } catch (MalformedURLException e) {
 793                                         // Not an url
 794                                 }
 795                         }
 796
 797                         // refresh the cached file
 798                         if (url != null) {
 799                                 try {
 800                                         Instance.getCache().refresh(url, support, true);
 801                                 } catch (IOException e) {
 802                                         // woops, broken image
 803                                         url = null;
 804                                 }
 805                         }
 806                 }
 807
 808                 return url;
 809         }
 810
 811         /**
 812          * Open the input file that will be used through the support.
 813          *
 814          * @param source
 815          *            the source {@link URL}
 816          *
 817          * @return the {@link InputStream}
 818          *
 819          * @throws IOException
 820          *             in case of I/O error
 821          */
 822         protected InputStream openInput(URL source) throws IOException {
 823                 return Instance.getCache().open(source, this, false);
 824         }
 825
 826         /**
 827          * Reset the given {@link InputStream} and return it.
 828          *
 829          * @param in
 830          *            the {@link InputStream} to reset
 831          *
 832          * @return the same {@link InputStream} after reset
 833          */
 834         protected InputStream reset(InputStream in) {
 835                 try {
 836                         in.reset();
 837                 } catch (IOException e) {
 838                 }
 839                 return in;
 840         }
 841
 842         /**
 843          * Reset then return {@link BasicSupport#in}.
 844          *
 845          * @return {@link BasicSupport#in}
 846          */
 847         protected InputStream getInput() {
 848                 return reset(in);
 849         }
 850
 851         /**
 852          * Fix the author name if it is prefixed with some "by" {@link String}.
 853          *
 854          * @param author
 855          *            the author with a possible prefix
 856          *
 857          * @return the author without prefixes
 858          */
 859         protected String fixAuthor(String author) {
 860                 if (author != null) {
 861                         for (String suffix : new String[] { " ", ":" }) {
 862                                 for (String byString : Instance.getConfig()
 863                                                 .getString(Config.BYS).split(",")) {
 864                                         byString += suffix;
 865                                         if (author.toUpperCase().startsWith(byString.toUpperCase())) {
 866                                                 author = author.substring(byString.length()).trim();
 867                                         }
 868                                 }
 869                         }
 870
 871                         // Special case (without suffix):
 872                         if (author.startsWith("©")) {
 873                                 author = author.substring(1);
 874                         }
 875                 }
 876
 877                 return author;
 878         }
 879
 880         /**
 881          * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
 882          * and requotify them (i.e., separate them into QUOTE paragraphs and other
 883          * paragraphs (quotes or not)).
 884          *
 885          * @param para
 886          *            the paragraph to requotify (not necessarily a quote)
 887          *
 888          * @return the correctly (or so we hope) quotified paragraphs
 889          */
 890         protected List<Paragraph> requotify(Paragraph para) {
 891                 List<Paragraph> newParas = new ArrayList<Paragraph>();
 892
 893                 if (para.getType() == ParagraphType.QUOTE
 894                                 && para.getContent().length() > 2) {
 895                         String line = para.getContent();
 896                         boolean singleQ = line.startsWith("" + openQuote);
 897                         boolean doubleQ = line.startsWith("" + openDoubleQuote);
 898
 899                         // Do not try when more than one quote at a time
 900                         // (some stories are not easily readable if we do)
 901                         if (singleQ
 902                                         && line.indexOf(closeQuote, 1) < line
 903                                                         .lastIndexOf(closeQuote)) {
 904                                 newParas.add(para);
 905                                 return newParas;
 906                         }
 907                         if (doubleQ
 908                                         && line.indexOf(closeDoubleQuote, 1) < line
 909                                                         .lastIndexOf(closeDoubleQuote)) {
 910                                 newParas.add(para);
 911                                 return newParas;
 912                         }
 913                         //
 914
 915                         if (!singleQ && !doubleQ) {
 916                                 line = openDoubleQuote + line + closeDoubleQuote;
 917                                 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
 918                         } else {
 919                                 char open = singleQ ? openQuote : openDoubleQuote;
 920                                 char close = singleQ ? closeQuote : closeDoubleQuote;
 921
 922                                 int posDot = -1;
 923                                 boolean inQuote = false;
 924                                 int i = 0;
 925                                 for (char car : line.toCharArray()) {
 926                                         if (car == open) {
 927                                                 inQuote = true;
 928                                         } else if (car == close) {
 929                                                 inQuote = false;
 930                                         } else if (car == '.' && !inQuote) {
 931                                                 posDot = i;
 932                                                 break;
 933                                         }
 934                                         i++;
 935                                 }
 936
 937                                 if (posDot >= 0) {
 938                                         String rest = line.substring(posDot + 1).trim();
 939                                         line = line.substring(0, posDot + 1).trim();
 940                                         newParas.add(new Paragraph(ParagraphType.QUOTE, line));
 941                                         if (!rest.isEmpty()) {
 942                                                 newParas.addAll(requotify(processPara(rest)));
 943                                         }
 944                                 } else {
 945                                         newParas.add(para);
 946                                 }
 947                         }
 948                 } else {
 949                         newParas.add(para);
 950                 }
 951
 952                 return newParas;
 953         }
 954
 955         /**
 956          * Process a {@link Paragraph} from a raw line of text.
 957          * <p>
 958          * Will also fix quotes and HTML encoding if needed.
 959          *
 960          * @param line
 961          *            the raw line
 962          *
 963          * @return the processed {@link Paragraph}
 964          */
 965         protected Paragraph processPara(String line) {
 966                 line = ifUnhtml(line).trim();
 967
 968                 boolean space = true;
 969                 boolean brk = true;
 970                 boolean quote = false;
 971                 boolean tentativeCloseQuote = false;
 972                 char prev = '\0';
 973                 int dashCount = 0;
 974
 975                 StringBuilder builder = new StringBuilder();
 976                 for (char car : line.toCharArray()) {
 977                         if (car != '-') {
 978                                 if (dashCount > 0) {
 979                                         // dash, ndash and mdash: - – —
 980                                         // currently: always use mdash
 981                                         builder.append(dashCount == 1 ? '-' : '—');
 982                                 }
 983                                 dashCount = 0;
 984                         }
 985
 986                         if (tentativeCloseQuote) {
 987                                 tentativeCloseQuote = false;
 988                                 if (Character.isLetterOrDigit(car)) {
 989                                         builder.append("'");
 990                                 } else {
 991                                         // handle double-single quotes as double quotes
 992                                         if (prev == car) {
 993                                                 builder.append(closeDoubleQuote);
 994                                                 continue;
 995                                         } else {
 996                                                 builder.append(closeQuote);
 997                                         }
 998                                 }
 999                         }
1000
1001                         switch (car) {
1002                         case ' ': // note: unbreakable space
1003                         case ' ':
1004                         case '\t':
1005                         case '\n': // just in case
1006                         case '\r': // just in case
1007                                 builder.append(' ');
1008                                 break;
1009
1010                         case '\'':
1011                                 if (space || (brk && quote)) {
1012                                         quote = true;
1013                                         // handle double-single quotes as double quotes
1014                                         if (prev == car) {
1015                                                 builder.deleteCharAt(builder.length() - 1);
1016                                                 builder.append(openDoubleQuote);
1017                                         } else {
1018                                                 builder.append(openQuote);
1019                                         }
1020                                 } else if (prev == ' ' || prev == car) {
1021                                         // handle double-single quotes as double quotes
1022                                         if (prev == car) {
1023                                                 builder.deleteCharAt(builder.length() - 1);
1024                                                 builder.append(openDoubleQuote);
1025                                         } else {
1026                                                 builder.append(openQuote);
1027                                         }
1028                                 } else {
1029                                         // it is a quote ("I'm off") or a 'quote' ("This
1030                                         // 'good' restaurant"...)
1031                                         tentativeCloseQuote = true;
1032                                 }
1033                                 break;
1034
1035                         case '"':
1036                                 if (space || (brk && quote)) {
1037                                         quote = true;
1038                                         builder.append(openDoubleQuote);
1039                                 } else if (prev == ' ') {
1040                                         builder.append(openDoubleQuote);
1041                                 } else {
1042                                         builder.append(closeDoubleQuote);
1043                                 }
1044                                 break;
1045
1046                         case '-':
1047                                 if (space) {
1048                                         quote = true;
1049                                 } else {
1050                                         dashCount++;
1051                                 }
1052                                 space = false;
1053                                 break;
1054
1055                         case '*':
1056                         case '~':
1057                         case '/':
1058                         case '\\':
1059                         case '<':
1060                         case '>':
1061                         case '=':
1062                         case '+':
1063                         case '_':
1064                         case '–':
1065                         case '—':
1066                                 space = false;
1067                                 builder.append(car);
1068                                 break;
1069
1070                         case '‘':
1071                         case '`':
1072                         case '‹':
1073                         case '﹁':
1074                         case '〈':
1075                         case '「':
1076                                 if (space || (brk && quote)) {
1077                                         quote = true;
1078                                         builder.append(openQuote);
1079                                 } else {
1080                                         // handle double-single quotes as double quotes
1081                                         if (prev == car) {
1082                                                 builder.deleteCharAt(builder.length() - 1);
1083                                                 builder.append(openDoubleQuote);
1084                                         } else {
1085                                                 builder.append(openQuote);
1086                                         }
1087                                 }
1088                                 space = false;
1089                                 brk = false;
1090                                 break;
1091
1092                         case '’':
1093                         case '›':
1094                         case '﹂':
1095                         case '〉':
1096                         case '」':
1097                                 space = false;
1098                                 brk = false;
1099                                 // handle double-single quotes as double quotes
1100                                 if (prev == car) {
1101                                         builder.deleteCharAt(builder.length() - 1);
1102                                         builder.append(closeDoubleQuote);
1103                                 } else {
1104                                         builder.append(closeQuote);
1105                                 }
1106                                 break;
1107
1108                         case '«':
1109                         case '“':
1110                         case '﹃':
1111                         case '《':
1112                         case '『':
1113                                 if (space || (brk && quote)) {
1114                                         quote = true;
1115                                         builder.append(openDoubleQuote);
1116                                 } else {
1117                                         builder.append(openDoubleQuote);
1118                                 }
1119                                 space = false;
1120                                 brk = false;
1121                                 break;
1122
1123                         case '»':
1124                         case '”':
1125                         case '﹄':
1126                         case '》':
1127                         case '』':
1128                                 space = false;
1129                                 brk = false;
1130                                 builder.append(closeDoubleQuote);
1131                                 break;
1132
1133                         default:
1134                                 space = false;
1135                                 brk = false;
1136                                 builder.append(car);
1137                                 break;
1138                         }
1139
1140                         prev = car;
1141                 }
1142
1143                 if (tentativeCloseQuote) {
1144                         tentativeCloseQuote = false;
1145                         builder.append(closeQuote);
1146                 }
1147
1148                 line = builder.toString().trim();
1149
1150                 ParagraphType type = ParagraphType.NORMAL;
1151                 if (space) {
1152                         type = ParagraphType.BLANK;
1153                 } else if (brk) {
1154                         type = ParagraphType.BREAK;
1155                 } else if (quote) {
1156                         type = ParagraphType.QUOTE;
1157                 }
1158
1159                 return new Paragraph(type, line);
1160         }
1161
1162         /**
1163          * Remove the HTML from the input <b>if</b> {@link BasicSupport#isHtml()} is
1164          * true.
1165          *
1166          * @param input
1167          *            the input
1168          *
1169          * @return the no html version if needed
1170          */
1171         private String ifUnhtml(String input) {
1172                 if (isHtml() && input != null) {
1173                         return StringUtils.unhtml(input);
1174                 }
1175
1176                 return input;
1177         }
1178
1179         /**
1180          * Return a {@link BasicSupport} implementation supporting the given
1181          * resource if possible.
1182          *
1183          * @param url
1184          *            the story resource
1185          *
1186          * @return an implementation that supports it, or NULL
1187          */
1188         public static BasicSupport getSupport(URL url) {
1189                 if (url == null) {
1190                         return null;
1191                 }
1192
1193                 // TEXT and INFO_TEXT always support files (not URLs though)
1194                 for (SupportType type : SupportType.values()) {
1195                         if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1196                                 BasicSupport support = getSupport(type);
1197                                 if (support != null && support.supports(url)) {
1198                                         return support;
1199                                 }
1200                         }
1201                 }
1202
1203                 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
1204                                 SupportType.TEXT }) {
1205                         BasicSupport support = getSupport(type);
1206                         if (support != null && support.supports(url)) {
1207                                 return support;
1208                         }
1209                 }
1210
1211                 return null;
1212         }
1213
1214         /**
1215          * Return a {@link BasicSupport} implementation supporting the given type.
1216          *
1217          * @param type
1218          *            the type
1219          *
1220          * @return an implementation that supports it, or NULL
1221          */
1222         public static BasicSupport getSupport(SupportType type) {
1223                 switch (type) {
1224                 case EPUB:
1225                         return new Epub().setType(type);
1226                 case INFO_TEXT:
1227                         return new InfoText().setType(type);
1228                 case FIMFICTION:
1229                         return new Fimfiction().setType(type);
1230                 case FANFICTION:
1231                         return new Fanfiction().setType(type);
1232                 case TEXT:
1233                         return new Text().setType(type);
1234                 case MANGAFOX:
1235                         return new MangaFox().setType(type);
1236                 case E621:
1237                         return new E621().setType(type);
1238                 case YIFFSTAR:
1239                         return new YiffStar().setType(type);
1240                 case CBZ:
1241                         return new Cbz().setType(type);
1242                 case HTML:
1243                         return new Html().setType(type);
1244                 }
1245
1246                 return null;
1247         }
1248
1249         /**
1250          * Return the first line from the given input which correspond to the given
1251          * selectors.
1252          *
1253          * @param in
1254          *            the input
1255          * @param needle
1256          *            a string that must be found inside the target line (also
1257          *            supports "^" at start to say "only if it starts with" the
1258          *            needle)
1259          * @param relativeLine
1260          *            the line to return based upon the target line position (-1 =
1261          *            the line before, 0 = the target line...)
1262          *
1263          * @return the line
1264          */
1265         static String getLine(InputStream in, String needle, int relativeLine) {
1266                 return getLine(in, needle, relativeLine, true);
1267         }
1268
1269         /**
1270          * Return a line from the given input which correspond to the given
1271          * selectors.
1272          *
1273          * @param in
1274          *            the input
1275          * @param needle
1276          *            a string that must be found inside the target line (also
1277          *            supports "^" at start to say "only if it starts with" the
1278          *            needle)
1279          * @param relativeLine
1280          *            the line to return based upon the target line position (-1 =
1281          *            the line before, 0 = the target line...)
1282          * @param first
1283          *            takes the first result (as opposed to the last one, which will
1284          *            also always spend the input)
1285          *
1286          * @return the line
1287          */
1288         static String getLine(InputStream in, String needle, int relativeLine,
1289                         boolean first) {
1290                 String rep = null;
1291
1292                 try {
1293                         in.reset();
1294                 } catch (IOException e) {
1295                         Instance.syserr(e);
1296                 }
1297
1298                 List<String> lines = new ArrayList<String>();
1299                 @SuppressWarnings("resource")
1300                 Scanner scan = new Scanner(in, "UTF-8");
1301                 int index = -1;
1302                 scan.useDelimiter("\\n");
1303                 while (scan.hasNext()) {
1304                         lines.add(scan.next());
1305
1306                         if (index == -1) {
1307                                 if (needle.startsWith("^")) {
1308                                         if (lines.get(lines.size() - 1).startsWith(
1309                                                         needle.substring(1))) {
1310                                                 index = lines.size() - 1;
1311                                         }
1312
1313                                 } else {
1314                                         if (lines.get(lines.size() - 1).contains(needle)) {
1315                                                 index = lines.size() - 1;
1316                                         }
1317                                 }
1318                         }
1319
1320                         if (index >= 0 && index + relativeLine < lines.size()) {
1321                                 rep = lines.get(index + relativeLine);
1322                                 if (first) {
1323                                         break;
1324                                 }
1325                         }
1326                 }
1327
1328                 return rep;
1329         }
1330 }