src/be/nikiroo/fanfix/supported/BasicSupport.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.awt.image.BufferedImage;
   4 import java.io.BufferedReader;
   5 import java.io.ByteArrayInputStream;
   6 import java.io.File;
   7 import java.io.IOException;
   8 import java.io.InputStream;
   9 import java.io.InputStreamReader;
  10 import java.net.MalformedURLException;
  11 import java.net.URL;
  12 import java.util.ArrayList;
  13 import java.util.HashMap;
  14 import java.util.List;
  15 import java.util.Map;
  16 import java.util.Map.Entry;
  17 import java.util.Scanner;
  18
  19 import be.nikiroo.fanfix.Instance;
  20 import be.nikiroo.fanfix.bundles.Config;
  21 import be.nikiroo.fanfix.bundles.StringId;
  22 import be.nikiroo.fanfix.data.Chapter;
  23 import be.nikiroo.fanfix.data.MetaData;
  24 import be.nikiroo.fanfix.data.Paragraph;
  25 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
  26 import be.nikiroo.fanfix.data.Story;
  27 import be.nikiroo.utils.IOUtils;
  28 import be.nikiroo.utils.Progress;
  29 import be.nikiroo.utils.StringUtils;
  30
  31 /**
  32  * This class is the base class used by the other support classes. It can be
  33  * used outside of this package, and have static method that you can use to get
  34  * access to the correct support class.
  35  * <p>
  36  * It will be used with 'resources' (usually web pages or files).
  37  *
  38  * @author niki
  39  */
  40 public abstract class BasicSupport {
  41         /**
  42          * The supported input types for which we can get a {@link BasicSupport}
  43          * object.
  44          *
  45          * @author niki
  46          */
  47         public enum SupportType {
  48                 /** EPUB files created with this program */
  49                 EPUB,
  50                 /** Pure text file with some rules */
  51                 TEXT,
  52                 /** TEXT but with associated .info file */
  53                 INFO_TEXT,
  54                 /** My Little Pony fanfictions */
  55                 FIMFICTION,
  56                 /** Fanfictions from a lot of different universes */
  57                 FANFICTION,
  58                 /** Website with lots of Mangas */
  59                 MANGAFOX,
  60                 /** Furry website with comics support */
  61                 E621,
  62                 /** Furry website with stories */
  63                 YIFFSTAR,
  64                 /** CBZ files */
  65                 CBZ,
  66                 /** HTML files */
  67                 HTML;
  68
  69                 /**
  70                  * A description of this support type (more information than the
  71                  * {@link BasicSupport#getSourceName()}).
  72                  *
  73                  * @return the description
  74                  */
  75                 public String getDesc() {
  76                         String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
  77                                         this.name());
  78
  79                         if (desc == null) {
  80                                 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
  81                         }
  82
  83                         return desc;
  84                 }
  85
  86                 /**
  87                  * The name of this support type (a short version).
  88                  *
  89                  * @return the name
  90                  */
  91                 public String getSourceName() {
  92                         BasicSupport support = BasicSupport.getSupport(this);
  93                         if (support != null) {
  94                                 return support.getSourceName();
  95                         }
  96
  97                         return null;
  98                 }
  99
 100                 @Override
 101                 public String toString() {
 102                         return super.toString().toLowerCase();
 103                 }
 104
 105                 /**
 106                  * Call {@link SupportType#valueOf(String.toUpperCase())}.
 107                  *
 108                  * @param typeName
 109                  *            the possible type name
 110                  *
 111                  * @return NULL or the type
 112                  */
 113                 public static SupportType valueOfUC(String typeName) {
 114                         return SupportType.valueOf(typeName == null ? null : typeName
 115                                         .toUpperCase());
 116                 }
 117
 118                 /**
 119                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 120                  * NULL for NULL instead of raising exception.
 121                  *
 122                  * @param typeName
 123                  *            the possible type name
 124                  *
 125                  * @return NULL or the type
 126                  */
 127                 public static SupportType valueOfNullOkUC(String typeName) {
 128                         if (typeName == null) {
 129                                 return null;
 130                         }
 131
 132                         return SupportType.valueOfUC(typeName);
 133                 }
 134
 135                 /**
 136                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 137                  * NULL in case of error instead of raising an exception.
 138                  *
 139                  * @param typeName
 140                  *            the possible type name
 141                  *
 142                  * @return NULL or the type
 143                  */
 144                 public static SupportType valueOfAllOkUC(String typeName) {
 145                         try {
 146                                 return SupportType.valueOfUC(typeName);
 147                         } catch (Exception e) {
 148                                 return null;
 149                         }
 150                 }
 151         }
 152
 153         private InputStream in;
 154         private SupportType type;
 155         private URL currentReferer; // with only one 'r', as in 'HTTP'...
 156
 157         // quote chars
 158         private char openQuote = Instance.getTrans().getChar(
 159                         StringId.OPEN_SINGLE_QUOTE);
 160         private char closeQuote = Instance.getTrans().getChar(
 161                         StringId.CLOSE_SINGLE_QUOTE);
 162         private char openDoubleQuote = Instance.getTrans().getChar(
 163                         StringId.OPEN_DOUBLE_QUOTE);
 164         private char closeDoubleQuote = Instance.getTrans().getChar(
 165                         StringId.CLOSE_DOUBLE_QUOTE);
 166
 167         /**
 168          * The name of this support class.
 169          *
 170          * @return the name
 171          */
 172         protected abstract String getSourceName();
 173
 174         /**
 175          * Check if the given resource is supported by this {@link BasicSupport}.
 176          *
 177          * @param url
 178          *            the resource to check for
 179          *
 180          * @return TRUE if it is
 181          */
 182         protected abstract boolean supports(URL url);
 183
 184         /**
 185          * Return TRUE if the support will return HTML encoded content values for
 186          * the chapters content.
 187          *
 188          * @return TRUE for HTML
 189          */
 190         protected abstract boolean isHtml();
 191
 192         protected abstract MetaData getMeta(URL source, InputStream in)
 193                         throws IOException;
 194
 195         /**
 196          * Return the story description.
 197          *
 198          * @param source
 199          *            the source of the story
 200          * @param in
 201          *            the input (the main resource)
 202          *
 203          * @return the description
 204          *
 205          * @throws IOException
 206          *             in case of I/O error
 207          */
 208         protected abstract String getDesc(URL source, InputStream in)
 209                         throws IOException;
 210
 211         /**
 212          * Return the list of chapters (name and resource).
 213          *
 214          * @param source
 215          *            the source of the story
 216          * @param in
 217          *            the input (the main resource)
 218          *
 219          * @return the chapters
 220          *
 221          * @throws IOException
 222          *             in case of I/O error
 223          */
 224         protected abstract List<Entry<String, URL>> getChapters(URL source,
 225                         InputStream in) throws IOException;
 226
 227         /**
 228          * Return the content of the chapter (possibly HTML encoded, if
 229          * {@link BasicSupport#isHtml()} is TRUE).
 230          *
 231          * @param source
 232          *            the source of the story
 233          * @param in
 234          *            the input (the main resource)
 235          * @param number
 236          *            the chapter number
 237          *
 238          * @return the content
 239          *
 240          * @throws IOException
 241          *             in case of I/O error
 242          */
 243         protected abstract String getChapterContent(URL source, InputStream in,
 244                         int number) throws IOException;
 245
 246         /**
 247          * Log into the support (can be a no-op depending upon the support).
 248          *
 249          * @throws IOException
 250          *             in case of I/O error
 251          */
 252         public void login() throws IOException {
 253
 254         }
 255
 256         /**
 257          * Return the list of cookies (values included) that must be used to
 258          * correctly fetch the resources.
 259          * <p>
 260          * You are expected to call the super method implementation if you override
 261          * it.
 262          *
 263          * @return the cookies
 264          *
 265          * @throws IOException
 266          *             in case of I/O error
 267          */
 268         public Map<String, String> getCookies() throws IOException {
 269                 return new HashMap<String, String>();
 270         }
 271
 272         /**
 273          * Return the canonical form of the main {@link URL}.
 274          *
 275          * @param source
 276          *            the source {@link URL}
 277          *
 278          * @return the canonical form of this {@link URL}
 279          *
 280          * @throws IOException
 281          *             in case of I/O error
 282          */
 283         public URL getCanonicalUrl(URL source) throws IOException {
 284                 return source;
 285         }
 286
 287         /**
 288          * Process the given story resource into a partially filled {@link Story}
 289          * object containing the name and metadata, except for the description.
 290          *
 291          * @param url
 292          *            the story resource
 293          *
 294          * @return the {@link Story}
 295          *
 296          * @throws IOException
 297          *             in case of I/O error
 298          */
 299         public Story processMeta(URL url) throws IOException {
 300                 return processMeta(url, true, false);
 301         }
 302
 303         /**
 304          * Process the given story resource into a partially filled {@link Story}
 305          * object containing the name and metadata.
 306          *
 307          * @param url
 308          *            the story resource
 309          *
 310          * @param close
 311          *            close "this" and "in" when done
 312          *
 313          * @return the {@link Story}
 314          *
 315          * @throws IOException
 316          *             in case of I/O error
 317          */
 318         protected Story processMeta(URL url, boolean close, boolean getDesc)
 319                         throws IOException {
 320                 login();
 321
 322                 url = getCanonicalUrl(url);
 323
 324                 setCurrentReferer(url);
 325
 326                 in = openInput(url);
 327                 if (in == null) {
 328                         return null;
 329                 }
 330
 331                 try {
 332                         preprocess(url, getInput());
 333
 334                         Story story = new Story();
 335                         MetaData meta = getMeta(url, getInput());
 336                         story.setMeta(meta);
 337
 338                         if (meta != null && meta.getCover() == null) {
 339                                 meta.setCover(getDefaultCover(meta.getSubject()));
 340                         }
 341
 342                         if (getDesc) {
 343                                 String descChapterName = Instance.getTrans().getString(
 344                                                 StringId.DESCRIPTION);
 345                                 story.getMeta().setResume(
 346                                                 makeChapter(url, 0, descChapterName,
 347                                                                 getDesc(url, getInput())));
 348                         }
 349
 350                         return story;
 351                 } finally {
 352                         if (close) {
 353                                 try {
 354                                         close();
 355                                 } catch (IOException e) {
 356                                         Instance.syserr(e);
 357                                 }
 358
 359                                 if (in != null) {
 360                                         in.close();
 361                                 }
 362                         }
 363
 364                         setCurrentReferer(null);
 365                 }
 366         }
 367
 368         /**
 369          * Process the given story resource into a fully filled {@link Story}
 370          * object.
 371          *
 372          * @param url
 373          *            the story resource
 374          * @param pg
 375          *            the optional progress reporter
 376          *
 377          * @return the {@link Story}
 378          *
 379          * @throws IOException
 380          *             in case of I/O error
 381          */
 382         public Story process(URL url, Progress pg) throws IOException {
 383                 if (pg == null) {
 384                         pg = new Progress();
 385                 } else {
 386                         pg.setMinMax(0, 100);
 387                 }
 388
 389                 url = getCanonicalUrl(url);
 390                 pg.setProgress(1);
 391                 try {
 392                         Story story = processMeta(url, false, true);
 393                         pg.setProgress(10);
 394                         if (story == null) {
 395                                 pg.setProgress(100);
 396                                 return null;
 397                         }
 398
 399                         setCurrentReferer(url);
 400
 401                         story.setChapters(new ArrayList<Chapter>());
 402
 403                         List<Entry<String, URL>> chapters = getChapters(url, getInput());
 404                         pg.setProgress(20);
 405
 406                         int i = 1;
 407                         if (chapters != null) {
 408                                 Progress pgChaps = new Progress(0, chapters.size());
 409                                 pg.addProgress(pgChaps, 80);
 410
 411                                 for (Entry<String, URL> chap : chapters) {
 412                                         setCurrentReferer(chap.getValue());
 413                                         InputStream chapIn = Instance.getCache().open(
 414                                                         chap.getValue(), this, true);
 415                                         try {
 416                                                 story.getChapters().add(
 417                                                                 makeChapter(url, i, chap.getKey(),
 418                                                                                 getChapterContent(url, chapIn, i)));
 419                                         } finally {
 420                                                 chapIn.close();
 421                                         }
 422
 423                                         pgChaps.setProgress(i++);
 424                                 }
 425                         } else {
 426                                 pg.setProgress(100);
 427                         }
 428
 429                         return story;
 430
 431                 } finally {
 432                         try {
 433                                 close();
 434                         } catch (IOException e) {
 435                                 Instance.syserr(e);
 436                         }
 437
 438                         if (in != null) {
 439                                 in.close();
 440                         }
 441
 442                         setCurrentReferer(null);
 443                 }
 444         }
 445
 446         /**
 447          * The support type.
 448          *
 449          * @return the type
 450          */
 451         public SupportType getType() {
 452                 return type;
 453         }
 454
 455         /**
 456          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 457          * the current {@link URL} we work on.
 458          *
 459          * @return the referer
 460          */
 461         public URL getCurrentReferer() {
 462                 return currentReferer;
 463         }
 464
 465         /**
 466          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 467          * the current {@link URL} we work on.
 468          *
 469          * @param currentReferer
 470          *            the new referer
 471          */
 472         protected void setCurrentReferer(URL currentReferer) {
 473                 this.currentReferer = currentReferer;
 474         }
 475
 476         /**
 477          * The support type.
 478          *
 479          * @param type
 480          *            the new type
 481          *
 482          * @return this
 483          */
 484         protected BasicSupport setType(SupportType type) {
 485                 this.type = type;
 486                 return this;
 487         }
 488
 489         /**
 490          * Prepare the support if needed before processing.
 491          *
 492          * @param source
 493          *            the source of the story
 494          * @param in
 495          *            the input (the main resource)
 496          *
 497          * @throws IOException
 498          *             on I/O error
 499          */
 500         protected void preprocess(URL source, InputStream in) throws IOException {
 501         }
 502
 503         /**
 504          * Now that we have processed the {@link Story}, close the resources if any.
 505          *
 506          * @throws IOException
 507          *             on I/O error
 508          */
 509         protected void close() throws IOException {
 510         }
 511
 512         /**
 513          * Create a {@link Chapter} object from the given information, formatting
 514          * the content as it should be.
 515          *
 516          * @param number
 517          *            the chapter number
 518          * @param name
 519          *            the chapter name
 520          * @param content
 521          *            the chapter content
 522          *
 523          * @return the {@link Chapter}
 524          *
 525          * @throws IOException
 526          *             in case of I/O error
 527          */
 528         protected Chapter makeChapter(URL source, int number, String name,
 529                         String content) throws IOException {
 530                 // Chapter name: process it correctly, then remove the possible
 531                 // redundant "Chapter x: " in front of it
 532                 String chapterName = processPara(name).getContent().trim();
 533                 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
 534                                 .split(",")) {
 535                         String chapterWord = Instance.getConfig().getStringX(
 536                                         Config.CHAPTER, lang);
 537                         if (chapterName.startsWith(chapterWord)) {
 538                                 chapterName = chapterName.substring(chapterWord.length())
 539                                                 .trim();
 540                                 break;
 541                         }
 542                 }
 543
 544                 if (chapterName.startsWith(Integer.toString(number))) {
 545                         chapterName = chapterName.substring(
 546                                         Integer.toString(number).length()).trim();
 547                 }
 548
 549                 if (chapterName.startsWith(":")) {
 550                         chapterName = chapterName.substring(1).trim();
 551                 }
 552                 //
 553
 554                 Chapter chap = new Chapter(number, chapterName);
 555
 556                 if (content != null) {
 557                         chap.setParagraphs(makeParagraphs(source, content));
 558                 }
 559
 560                 return chap;
 561
 562         }
 563
 564         /**
 565          * Convert the given content into {@link Paragraph}s.
 566          *
 567          * @param source
 568          *            the source URL of the story
 569          * @param content
 570          *            the textual content
 571          *
 572          * @return the {@link Paragraph}s
 573          *
 574          * @throws IOException
 575          *             in case of I/O error
 576          */
 577         protected List<Paragraph> makeParagraphs(URL source, String content)
 578                         throws IOException {
 579                 if (isHtml()) {
 580                         // Special <HR> processing:
 581                         content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
 582                                         "\n* * *\n");
 583                 }
 584
 585                 List<Paragraph> paras = new ArrayList<Paragraph>();
 586                 InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8"));
 587                 try {
 588                         BufferedReader buff = new BufferedReader(new InputStreamReader(in,
 589                                         "UTF-8"));
 590
 591                         for (String encodedLine = buff.readLine(); encodedLine != null; encodedLine = buff
 592                                         .readLine()) {
 593                                 String lines[];
 594                                 if (isHtml()) {
 595                                         lines = encodedLine.split("(<p>|</p>|<br>|<br/>|\\n)");
 596                                 } else {
 597                                         lines = new String[] { encodedLine };
 598                                 }
 599
 600                                 for (String aline : lines) {
 601                                         String line = aline.trim();
 602
 603                                         URL image = null;
 604                                         if (line.startsWith("[") && line.endsWith("]")) {
 605                                                 image = getImageUrl(this, source,
 606                                                                 line.substring(1, line.length() - 1).trim());
 607                                         }
 608
 609                                         if (image != null) {
 610                                                 paras.add(new Paragraph(image));
 611                                         } else {
 612                                                 paras.add(processPara(line));
 613                                         }
 614                                 }
 615                         }
 616                 } finally {
 617                         in.close();
 618                 }
 619
 620                 // Check quotes for "bad" format
 621                 List<Paragraph> newParas = new ArrayList<Paragraph>();
 622                 for (Paragraph para : paras) {
 623                         newParas.addAll(requotify(para));
 624                 }
 625                 paras = newParas;
 626
 627                 // Remove double blanks/brks
 628                 fixBlanksBreaks(paras);
 629
 630                 return paras;
 631         }
 632
 633         /**
 634          * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
 635          * those {@link Paragraph}s.
 636          * <p>
 637          * The resulting list will not contain a starting or trailing blank/break
 638          * nor 2 blanks or breaks following each other.
 639          *
 640          * @param paras
 641          *            the list of {@link Paragraph}s to fix
 642          */
 643         protected void fixBlanksBreaks(List<Paragraph> paras) {
 644                 boolean space = false;
 645                 boolean brk = true;
 646                 for (int i = 0; i < paras.size(); i++) {
 647                         Paragraph para = paras.get(i);
 648                         boolean thisSpace = para.getType() == ParagraphType.BLANK;
 649                         boolean thisBrk = para.getType() == ParagraphType.BREAK;
 650
 651                         if (i > 0 && space && thisBrk) {
 652                                 paras.remove(i - 1);
 653                                 i--;
 654                         } else if ((space || brk) && (thisSpace || thisBrk)) {
 655                                 paras.remove(i);
 656                                 i--;
 657                         }
 658
 659                         space = thisSpace;
 660                         brk = thisBrk;
 661                 }
 662
 663                 // Remove blank/brk at start
 664                 if (paras.size() > 0
 665                                 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
 666                                                 0).getType() == ParagraphType.BREAK)) {
 667                         paras.remove(0);
 668                 }
 669
 670                 // Remove blank/brk at end
 671                 int last = paras.size() - 1;
 672                 if (paras.size() > 0
 673                                 && (paras.get(last).getType() == ParagraphType.BLANK || paras
 674                                                 .get(last).getType() == ParagraphType.BREAK)) {
 675                         paras.remove(last);
 676                 }
 677         }
 678
 679         /**
 680          * Get the default cover related to this subject (see <tt>.info</tt> files).
 681          *
 682          * @param subject
 683          *            the subject
 684          *
 685          * @return the cover if any, or NULL
 686          */
 687         static BufferedImage getDefaultCover(String subject) {
 688                 if (subject != null && !subject.isEmpty()
 689                                 && Instance.getCoverDir() != null) {
 690                         try {
 691                                 File fileCover = new File(Instance.getCoverDir(), subject);
 692                                 return getImage(null, fileCover.toURI().toURL(), subject);
 693                         } catch (MalformedURLException e) {
 694                         }
 695                 }
 696
 697                 return null;
 698         }
 699
 700         /**
 701          * Return the list of supported image extensions.
 702          *
 703          * @param emptyAllowed
 704          *            TRUE to allow an empty extension on first place, which can be
 705          *            used when you may already have an extension in your input but
 706          *            are not sure about it
 707          *
 708          * @return the extensions
 709          */
 710         static String[] getImageExt(boolean emptyAllowed) {
 711                 if (emptyAllowed) {
 712                         return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 713                 } else {
 714                         return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 715                 }
 716         }
 717
 718         /**
 719          * Check if the given resource can be a local image or a remote image, then
 720          * refresh the cache with it if it is.
 721          *
 722          * @param source
 723          *            the story source
 724          * @param line
 725          *            the resource to check
 726          *
 727          * @return the image if found, or NULL
 728          *
 729          */
 730         static BufferedImage getImage(BasicSupport support, URL source, String line) {
 731                 URL url = getImageUrl(support, source, line);
 732                 if (url != null) {
 733                         InputStream in = null;
 734                         try {
 735                                 in = Instance.getCache().open(url, getSupport(url), true);
 736                                 return IOUtils.toImage(in);
 737                         } catch (IOException e) {
 738                         } finally {
 739                                 if (in != null) {
 740                                         try {
 741                                                 in.close();
 742                                         } catch (IOException e) {
 743                                         }
 744                                 }
 745                         }
 746                 }
 747
 748                 return null;
 749         }
 750
 751         /**
 752          * Check if the given resource can be a local image or a remote image, then
 753          * refresh the cache with it if it is.
 754          *
 755          * @param source
 756          *            the story source
 757          * @param line
 758          *            the resource to check
 759          *
 760          * @return the image URL if found, or NULL
 761          *
 762          */
 763         static URL getImageUrl(BasicSupport support, URL source, String line) {
 764                 URL url = null;
 765
 766                 if (line != null) {
 767                         // try for files
 768                         String path = null;
 769                         if (source != null) {
 770                                 path = new File(source.getFile()).getParent();
 771                                 try {
 772                                         String basePath = new File(new File(path), line.trim())
 773                                                         .getAbsolutePath();
 774                                         for (String ext : getImageExt(true)) {
 775                                                 if (new File(basePath + ext).exists()) {
 776                                                         url = new File(basePath + ext).toURI().toURL();
 777                                                 }
 778                                         }
 779                                 } catch (Exception e) {
 780                                         // Nothing to do here
 781                                 }
 782                         }
 783
 784                         if (url == null) {
 785                                 // try for URLs
 786                                 try {
 787                                         for (String ext : getImageExt(true)) {
 788                                                 if (Instance.getCache().check(new URL(line + ext))) {
 789                                                         url = new URL(line + ext);
 790                                                         break;
 791                                                 }
 792                                         }
 793
 794                                         // try out of cache
 795                                         if (url == null) {
 796                                                 for (String ext : getImageExt(true)) {
 797                                                         try {
 798                                                                 url = new URL(line + ext);
 799                                                                 Instance.getCache().refresh(url, support, true);
 800                                                                 break;
 801                                                         } catch (IOException e) {
 802                                                                 // no image with this ext
 803                                                                 url = null;
 804                                                         }
 805                                                 }
 806                                         }
 807                                 } catch (MalformedURLException e) {
 808                                         // Not an url
 809                                 }
 810                         }
 811
 812                         // refresh the cached file
 813                         if (url != null) {
 814                                 try {
 815                                         Instance.getCache().refresh(url, support, true);
 816                                 } catch (IOException e) {
 817                                         // woops, broken image
 818                                         url = null;
 819                                 }
 820                         }
 821                 }
 822
 823                 return url;
 824         }
 825
 826         /**
 827          * Open the input file that will be used through the support.
 828          *
 829          * @param source
 830          *            the source {@link URL}
 831          *
 832          * @return the {@link InputStream}
 833          *
 834          * @throws IOException
 835          *             in case of I/O error
 836          */
 837         protected InputStream openInput(URL source) throws IOException {
 838                 return Instance.getCache().open(source, this, false);
 839         }
 840
 841         /**
 842          * Reset the given {@link InputStream} and return it.
 843          *
 844          * @param in
 845          *            the {@link InputStream} to reset
 846          *
 847          * @return the same {@link InputStream} after reset
 848          */
 849         protected InputStream reset(InputStream in) {
 850                 try {
 851                         in.reset();
 852                 } catch (IOException e) {
 853                 }
 854                 return in;
 855         }
 856
 857         /**
 858          * Reset then return {@link BasicSupport#in}.
 859          *
 860          * @return {@link BasicSupport#in}
 861          */
 862         protected InputStream getInput() {
 863                 return reset(in);
 864         }
 865
 866         /**
 867          * Fix the author name if it is prefixed with some "by" {@link String}.
 868          *
 869          * @param author
 870          *            the author with a possible prefix
 871          *
 872          * @return the author without prefixes
 873          */
 874         protected String fixAuthor(String author) {
 875                 if (author != null) {
 876                         for (String suffix : new String[] { " ", ":" }) {
 877                                 for (String byString : Instance.getConfig()
 878                                                 .getString(Config.BYS).split(",")) {
 879                                         byString += suffix;
 880                                         if (author.toUpperCase().startsWith(byString.toUpperCase())) {
 881                                                 author = author.substring(byString.length()).trim();
 882                                         }
 883                                 }
 884                         }
 885
 886                         // Special case (without suffix):
 887                         if (author.startsWith("©")) {
 888                                 author = author.substring(1);
 889                         }
 890                 }
 891
 892                 return author;
 893         }
 894
 895         /**
 896          * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
 897          * and requotify them (i.e., separate them into QUOTE paragraphs and other
 898          * paragraphs (quotes or not)).
 899          *
 900          * @param para
 901          *            the paragraph to requotify (not necessarily a quote)
 902          *
 903          * @return the correctly (or so we hope) quotified paragraphs
 904          */
 905         protected List<Paragraph> requotify(Paragraph para) {
 906                 List<Paragraph> newParas = new ArrayList<Paragraph>();
 907
 908                 if (para.getType() == ParagraphType.QUOTE
 909                                 && para.getContent().length() > 2) {
 910                         String line = para.getContent();
 911                         boolean singleQ = line.startsWith("" + openQuote);
 912                         boolean doubleQ = line.startsWith("" + openDoubleQuote);
 913
 914                         // Do not try when more than one quote at a time
 915                         // (some stories are not easily readable if we do)
 916                         if (singleQ
 917                                         && line.indexOf(closeQuote, 1) < line
 918                                                         .lastIndexOf(closeQuote)) {
 919                                 newParas.add(para);
 920                                 return newParas;
 921                         }
 922                         if (doubleQ
 923                                         && line.indexOf(closeDoubleQuote, 1) < line
 924                                                         .lastIndexOf(closeDoubleQuote)) {
 925                                 newParas.add(para);
 926                                 return newParas;
 927                         }
 928                         //
 929
 930                         if (!singleQ && !doubleQ) {
 931                                 line = openDoubleQuote + line + closeDoubleQuote;
 932                                 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
 933                         } else {
 934                                 char open = singleQ ? openQuote : openDoubleQuote;
 935                                 char close = singleQ ? closeQuote : closeDoubleQuote;
 936
 937                                 int posDot = -1;
 938                                 boolean inQuote = false;
 939                                 int i = 0;
 940                                 for (char car : line.toCharArray()) {
 941                                         if (car == open) {
 942                                                 inQuote = true;
 943                                         } else if (car == close) {
 944                                                 inQuote = false;
 945                                         } else if (car == '.' && !inQuote) {
 946                                                 posDot = i;
 947                                                 break;
 948                                         }
 949                                         i++;
 950                                 }
 951
 952                                 if (posDot >= 0) {
 953                                         String rest = line.substring(posDot + 1).trim();
 954                                         line = line.substring(0, posDot + 1).trim();
 955                                         newParas.add(new Paragraph(ParagraphType.QUOTE, line));
 956                                         if (!rest.isEmpty()) {
 957                                                 newParas.addAll(requotify(processPara(rest)));
 958                                         }
 959                                 } else {
 960                                         newParas.add(para);
 961                                 }
 962                         }
 963                 } else {
 964                         newParas.add(para);
 965                 }
 966
 967                 return newParas;
 968         }
 969
 970         /**
 971          * Process a {@link Paragraph} from a raw line of text.
 972          * <p>
 973          * Will also fix quotes and HTML encoding if needed.
 974          *
 975          * @param line
 976          *            the raw line
 977          *
 978          * @return the processed {@link Paragraph}
 979          */
 980         protected Paragraph processPara(String line) {
 981                 line = ifUnhtml(line).trim();
 982
 983                 boolean space = true;
 984                 boolean brk = true;
 985                 boolean quote = false;
 986                 boolean tentativeCloseQuote = false;
 987                 char prev = '\0';
 988                 int dashCount = 0;
 989
 990                 StringBuilder builder = new StringBuilder();
 991                 for (char car : line.toCharArray()) {
 992                         if (car != '-') {
 993                                 if (dashCount > 0) {
 994                                         // dash, ndash and mdash: - – —
 995                                         // currently: always use mdash
 996                                         builder.append(dashCount == 1 ? '-' : '—');
 997                                 }
 998                                 dashCount = 0;
 999                         }
1000
1001                         if (tentativeCloseQuote) {
1002                                 tentativeCloseQuote = false;
1003                                 if (Character.isLetterOrDigit(car)) {
1004                                         builder.append("'");
1005                                 } else {
1006                                         // handle double-single quotes as double quotes
1007                                         if (prev == car) {
1008                                                 builder.append(closeDoubleQuote);
1009                                                 continue;
1010                                         } else {
1011                                                 builder.append(closeQuote);
1012                                         }
1013                                 }
1014                         }
1015
1016                         switch (car) {
1017                         case ' ': // note: unbreakable space
1018                         case ' ':
1019                         case '\t':
1020                         case '\n': // just in case
1021                         case '\r': // just in case
1022                                 builder.append(' ');
1023                                 break;
1024
1025                         case '\'':
1026                                 if (space || (brk && quote)) {
1027                                         quote = true;
1028                                         // handle double-single quotes as double quotes
1029                                         if (prev == car) {
1030                                                 builder.deleteCharAt(builder.length() - 1);
1031                                                 builder.append(openDoubleQuote);
1032                                         } else {
1033                                                 builder.append(openQuote);
1034                                         }
1035                                 } else if (prev == ' ' || prev == car) {
1036                                         // handle double-single quotes as double quotes
1037                                         if (prev == car) {
1038                                                 builder.deleteCharAt(builder.length() - 1);
1039                                                 builder.append(openDoubleQuote);
1040                                         } else {
1041                                                 builder.append(openQuote);
1042                                         }
1043                                 } else {
1044                                         // it is a quote ("I'm off") or a 'quote' ("This
1045                                         // 'good' restaurant"...)
1046                                         tentativeCloseQuote = true;
1047                                 }
1048                                 break;
1049
1050                         case '"':
1051                                 if (space || (brk && quote)) {
1052                                         quote = true;
1053                                         builder.append(openDoubleQuote);
1054                                 } else if (prev == ' ') {
1055                                         builder.append(openDoubleQuote);
1056                                 } else {
1057                                         builder.append(closeDoubleQuote);
1058                                 }
1059                                 break;
1060
1061                         case '-':
1062                                 if (space) {
1063                                         quote = true;
1064                                 } else {
1065                                         dashCount++;
1066                                 }
1067                                 space = false;
1068                                 break;
1069
1070                         case '*':
1071                         case '~':
1072                         case '/':
1073                         case '\\':
1074                         case '<':
1075                         case '>':
1076                         case '=':
1077                         case '+':
1078                         case '_':
1079                         case '–':
1080                         case '—':
1081                                 space = false;
1082                                 builder.append(car);
1083                                 break;
1084
1085                         case '‘':
1086                         case '`':
1087                         case '‹':
1088                         case '﹁':
1089                         case '〈':
1090                         case '「':
1091                                 if (space || (brk && quote)) {
1092                                         quote = true;
1093                                         builder.append(openQuote);
1094                                 } else {
1095                                         // handle double-single quotes as double quotes
1096                                         if (prev == car) {
1097                                                 builder.deleteCharAt(builder.length() - 1);
1098                                                 builder.append(openDoubleQuote);
1099                                         } else {
1100                                                 builder.append(openQuote);
1101                                         }
1102                                 }
1103                                 space = false;
1104                                 brk = false;
1105                                 break;
1106
1107                         case '’':
1108                         case '›':
1109                         case '﹂':
1110                         case '〉':
1111                         case '」':
1112                                 space = false;
1113                                 brk = false;
1114                                 // handle double-single quotes as double quotes
1115                                 if (prev == car) {
1116                                         builder.deleteCharAt(builder.length() - 1);
1117                                         builder.append(closeDoubleQuote);
1118                                 } else {
1119                                         builder.append(closeQuote);
1120                                 }
1121                                 break;
1122
1123                         case '«':
1124                         case '“':
1125                         case '﹃':
1126                         case '《':
1127                         case '『':
1128                                 if (space || (brk && quote)) {
1129                                         quote = true;
1130                                         builder.append(openDoubleQuote);
1131                                 } else {
1132                                         builder.append(openDoubleQuote);
1133                                 }
1134                                 space = false;
1135                                 brk = false;
1136                                 break;
1137
1138                         case '»':
1139                         case '”':
1140                         case '﹄':
1141                         case '》':
1142                         case '』':
1143                                 space = false;
1144                                 brk = false;
1145                                 builder.append(closeDoubleQuote);
1146                                 break;
1147
1148                         default:
1149                                 space = false;
1150                                 brk = false;
1151                                 builder.append(car);
1152                                 break;
1153                         }
1154
1155                         prev = car;
1156                 }
1157
1158                 if (tentativeCloseQuote) {
1159                         tentativeCloseQuote = false;
1160                         builder.append(closeQuote);
1161                 }
1162
1163                 line = builder.toString().trim();
1164
1165                 ParagraphType type = ParagraphType.NORMAL;
1166                 if (space) {
1167                         type = ParagraphType.BLANK;
1168                 } else if (brk) {
1169                         type = ParagraphType.BREAK;
1170                 } else if (quote) {
1171                         type = ParagraphType.QUOTE;
1172                 }
1173
1174                 return new Paragraph(type, line);
1175         }
1176
1177         /**
1178          * Remove the HTML from the input <b>if</b> {@link BasicSupport#isHtml()} is
1179          * true.
1180          *
1181          * @param input
1182          *            the input
1183          *
1184          * @return the no html version if needed
1185          */
1186         private String ifUnhtml(String input) {
1187                 if (isHtml() && input != null) {
1188                         return StringUtils.unhtml(input);
1189                 }
1190
1191                 return input;
1192         }
1193
1194         /**
1195          * Return a {@link BasicSupport} implementation supporting the given
1196          * resource if possible.
1197          *
1198          * @param url
1199          *            the story resource
1200          *
1201          * @return an implementation that supports it, or NULL
1202          */
1203         public static BasicSupport getSupport(URL url) {
1204                 if (url == null) {
1205                         return null;
1206                 }
1207
1208                 // TEXT and INFO_TEXT always support files (not URLs though)
1209                 for (SupportType type : SupportType.values()) {
1210                         if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1211                                 BasicSupport support = getSupport(type);
1212                                 if (support != null && support.supports(url)) {
1213                                         return support;
1214                                 }
1215                         }
1216                 }
1217
1218                 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
1219                                 SupportType.TEXT }) {
1220                         BasicSupport support = getSupport(type);
1221                         if (support != null && support.supports(url)) {
1222                                 return support;
1223                         }
1224                 }
1225
1226                 return null;
1227         }
1228
1229         /**
1230          * Return a {@link BasicSupport} implementation supporting the given type.
1231          *
1232          * @param type
1233          *            the type
1234          *
1235          * @return an implementation that supports it, or NULL
1236          */
1237         public static BasicSupport getSupport(SupportType type) {
1238                 switch (type) {
1239                 case EPUB:
1240                         return new Epub().setType(type);
1241                 case INFO_TEXT:
1242                         return new InfoText().setType(type);
1243                 case FIMFICTION:
1244                         return new Fimfiction().setType(type);
1245                 case FANFICTION:
1246                         return new Fanfiction().setType(type);
1247                 case TEXT:
1248                         return new Text().setType(type);
1249                 case MANGAFOX:
1250                         return new MangaFox().setType(type);
1251                 case E621:
1252                         return new E621().setType(type);
1253                 case YIFFSTAR:
1254                         return new YiffStar().setType(type);
1255                 case CBZ:
1256                         return new Cbz().setType(type);
1257                 case HTML:
1258                         return new Html().setType(type);
1259                 }
1260
1261                 return null;
1262         }
1263
1264         /**
1265          * Return the first line from the given input which correspond to the given
1266          * selectors.
1267          *
1268          * @param in
1269          *            the input
1270          * @param needle
1271          *            a string that must be found inside the target line (also
1272          *            supports "^" at start to say "only if it starts with" the
1273          *            needle)
1274          * @param relativeLine
1275          *            the line to return based upon the target line position (-1 =
1276          *            the line before, 0 = the target line...)
1277          *
1278          * @return the line
1279          */
1280         static String getLine(InputStream in, String needle, int relativeLine) {
1281                 return getLine(in, needle, relativeLine, true);
1282         }
1283
1284         /**
1285          * Return a line from the given input which correspond to the given
1286          * selectors.
1287          *
1288          * @param in
1289          *            the input
1290          * @param needle
1291          *            a string that must be found inside the target line (also
1292          *            supports "^" at start to say "only if it starts with" the
1293          *            needle)
1294          * @param relativeLine
1295          *            the line to return based upon the target line position (-1 =
1296          *            the line before, 0 = the target line...)
1297          * @param first
1298          *            takes the first result (as opposed to the last one, which will
1299          *            also always spend the input)
1300          *
1301          * @return the line
1302          */
1303         static String getLine(InputStream in, String needle, int relativeLine,
1304                         boolean first) {
1305                 String rep = null;
1306
1307                 try {
1308                         in.reset();
1309                 } catch (IOException e) {
1310                         Instance.syserr(e);
1311                 }
1312
1313                 List<String> lines = new ArrayList<String>();
1314                 @SuppressWarnings("resource")
1315                 Scanner scan = new Scanner(in, "UTF-8");
1316                 int index = -1;
1317                 scan.useDelimiter("\\n");
1318                 while (scan.hasNext()) {
1319                         lines.add(scan.next());
1320
1321                         if (index == -1) {
1322                                 if (needle.startsWith("^")) {
1323                                         if (lines.get(lines.size() - 1).startsWith(
1324                                                         needle.substring(1))) {
1325                                                 index = lines.size() - 1;
1326                                         }
1327
1328                                 } else {
1329                                         if (lines.get(lines.size() - 1).contains(needle)) {
1330                                                 index = lines.size() - 1;
1331                                         }
1332                                 }
1333                         }
1334
1335                         if (index >= 0 && index + relativeLine < lines.size()) {
1336                                 rep = lines.get(index + relativeLine);
1337                                 if (first) {
1338                                         break;
1339                                 }
1340                         }
1341                 }
1342
1343                 return rep;
1344         }
1345 }