src/be/nikiroo/fanfix/supported/BasicSupport.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.awt.image.BufferedImage;
   4 import java.io.BufferedReader;
   5 import java.io.ByteArrayInputStream;
   6 import java.io.File;
   7 import java.io.IOException;
   8 import java.io.InputStream;
   9 import java.io.InputStreamReader;
  10 import java.net.MalformedURLException;
  11 import java.net.URL;
  12 import java.util.ArrayList;
  13 import java.util.Date;
  14 import java.util.HashMap;
  15 import java.util.List;
  16 import java.util.Map;
  17 import java.util.Map.Entry;
  18 import java.util.Scanner;
  19
  20 import be.nikiroo.fanfix.Instance;
  21 import be.nikiroo.fanfix.bundles.Config;
  22 import be.nikiroo.fanfix.bundles.StringId;
  23 import be.nikiroo.fanfix.data.Chapter;
  24 import be.nikiroo.fanfix.data.MetaData;
  25 import be.nikiroo.fanfix.data.Paragraph;
  26 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
  27 import be.nikiroo.fanfix.data.Story;
  28 import be.nikiroo.utils.IOUtils;
  29 import be.nikiroo.utils.Progress;
  30 import be.nikiroo.utils.StringUtils;
  31
  32 /**
  33  * This class is the base class used by the other support classes. It can be
  34  * used outside of this package, and have static method that you can use to get
  35  * access to the correct support class.
  36  * <p>
  37  * It will be used with 'resources' (usually web pages or files).
  38  *
  39  * @author niki
  40  */
  41 public abstract class BasicSupport {
  42         /**
  43          * The supported input types for which we can get a {@link BasicSupport}
  44          * object.
  45          *
  46          * @author niki
  47          */
  48         public enum SupportType {
  49                 /** EPUB files created with this program */
  50                 EPUB,
  51                 /** Pure text file with some rules */
  52                 TEXT,
  53                 /** TEXT but with associated .info file */
  54                 INFO_TEXT,
  55                 /** My Little Pony fanfictions */
  56                 FIMFICTION,
  57                 /** Fanfictions from a lot of different universes */
  58                 FANFICTION,
  59                 /** Website with lots of Mangas */
  60                 MANGAFOX,
  61                 /** Furry website with comics support */
  62                 E621,
  63                 /** Furry website with stories */
  64                 YIFFSTAR,
  65                 /** CBZ files */
  66                 CBZ,
  67                 /** HTML files */
  68                 HTML;
  69
  70                 /**
  71                  * A description of this support type (more information than the
  72                  * {@link BasicSupport#getSourceName()}).
  73                  *
  74                  * @return the description
  75                  */
  76                 public String getDesc() {
  77                         String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
  78                                         this.name());
  79
  80                         if (desc == null) {
  81                                 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
  82                         }
  83
  84                         return desc;
  85                 }
  86
  87                 /**
  88                  * The name of this support type (a short version).
  89                  *
  90                  * @return the name
  91                  */
  92                 public String getSourceName() {
  93                         BasicSupport support = BasicSupport.getSupport(this);
  94                         if (support != null) {
  95                                 return support.getSourceName();
  96                         }
  97
  98                         return null;
  99                 }
 100
 101                 @Override
 102                 public String toString() {
 103                         return super.toString().toLowerCase();
 104                 }
 105
 106                 /**
 107                  * Call {@link SupportType#valueOf(String.toUpperCase())}.
 108                  *
 109                  * @param typeName
 110                  *            the possible type name
 111                  *
 112                  * @return NULL or the type
 113                  */
 114                 public static SupportType valueOfUC(String typeName) {
 115                         return SupportType.valueOf(typeName == null ? null : typeName
 116                                         .toUpperCase());
 117                 }
 118
 119                 /**
 120                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 121                  * NULL for NULL instead of raising exception.
 122                  *
 123                  * @param typeName
 124                  *            the possible type name
 125                  *
 126                  * @return NULL or the type
 127                  */
 128                 public static SupportType valueOfNullOkUC(String typeName) {
 129                         if (typeName == null) {
 130                                 return null;
 131                         }
 132
 133                         return SupportType.valueOfUC(typeName);
 134                 }
 135
 136                 /**
 137                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 138                  * NULL in case of error instead of raising an exception.
 139                  *
 140                  * @param typeName
 141                  *            the possible type name
 142                  *
 143                  * @return NULL or the type
 144                  */
 145                 public static SupportType valueOfAllOkUC(String typeName) {
 146                         try {
 147                                 return SupportType.valueOfUC(typeName);
 148                         } catch (Exception e) {
 149                                 return null;
 150                         }
 151                 }
 152         }
 153
 154         private InputStream in;
 155         private SupportType type;
 156         private URL currentReferer; // with only one 'r', as in 'HTTP'...
 157
 158         // quote chars
 159         private char openQuote = Instance.getTrans().getChar(
 160                         StringId.OPEN_SINGLE_QUOTE);
 161         private char closeQuote = Instance.getTrans().getChar(
 162                         StringId.CLOSE_SINGLE_QUOTE);
 163         private char openDoubleQuote = Instance.getTrans().getChar(
 164                         StringId.OPEN_DOUBLE_QUOTE);
 165         private char closeDoubleQuote = Instance.getTrans().getChar(
 166                         StringId.CLOSE_DOUBLE_QUOTE);
 167
 168         /**
 169          * The name of this support class.
 170          *
 171          * @return the name
 172          */
 173         protected abstract String getSourceName();
 174
 175         /**
 176          * Check if the given resource is supported by this {@link BasicSupport}.
 177          *
 178          * @param url
 179          *            the resource to check for
 180          *
 181          * @return TRUE if it is
 182          */
 183         protected abstract boolean supports(URL url);
 184
 185         /**
 186          * Return TRUE if the support will return HTML encoded content values for
 187          * the chapters content.
 188          *
 189          * @return TRUE for HTML
 190          */
 191         protected abstract boolean isHtml();
 192
 193         protected abstract MetaData getMeta(URL source, InputStream in)
 194                         throws IOException;
 195
 196         /**
 197          * Return the story description.
 198          *
 199          * @param source
 200          *            the source of the story
 201          * @param in
 202          *            the input (the main resource)
 203          *
 204          * @return the description
 205          *
 206          * @throws IOException
 207          *             in case of I/O error
 208          */
 209         protected abstract String getDesc(URL source, InputStream in)
 210                         throws IOException;
 211
 212         /**
 213          * Return the list of chapters (name and resource).
 214          *
 215          * @param source
 216          *            the source of the story
 217          * @param in
 218          *            the input (the main resource)
 219          *
 220          * @return the chapters
 221          *
 222          * @throws IOException
 223          *             in case of I/O error
 224          */
 225         protected abstract List<Entry<String, URL>> getChapters(URL source,
 226                         InputStream in) throws IOException;
 227
 228         /**
 229          * Return the content of the chapter (possibly HTML encoded, if
 230          * {@link BasicSupport#isHtml()} is TRUE).
 231          *
 232          * @param source
 233          *            the source of the story
 234          * @param in
 235          *            the input (the main resource)
 236          * @param number
 237          *            the chapter number
 238          *
 239          * @return the content
 240          *
 241          * @throws IOException
 242          *             in case of I/O error
 243          */
 244         protected abstract String getChapterContent(URL source, InputStream in,
 245                         int number) throws IOException;
 246
 247         /**
 248          * Log into the support (can be a no-op depending upon the support).
 249          *
 250          * @throws IOException
 251          *             in case of I/O error
 252          */
 253         public void login() throws IOException {
 254
 255         }
 256
 257         /**
 258          * Return the list of cookies (values included) that must be used to
 259          * correctly fetch the resources.
 260          * <p>
 261          * You are expected to call the super method implementation if you override
 262          * it.
 263          *
 264          * @return the cookies
 265          *
 266          * @throws IOException
 267          *             in case of I/O error
 268          */
 269         public Map<String, String> getCookies() throws IOException {
 270                 return new HashMap<String, String>();
 271         }
 272
 273         /**
 274          * Return the canonical form of the main {@link URL}.
 275          *
 276          * @param source
 277          *            the source {@link URL}
 278          *
 279          * @return the canonical form of this {@link URL}
 280          *
 281          * @throws IOException
 282          *             in case of I/O error
 283          */
 284         public URL getCanonicalUrl(URL source) throws IOException {
 285                 return source;
 286         }
 287
 288         /**
 289          * Process the given story resource into a partially filled {@link Story}
 290          * object containing the name and metadata, except for the description.
 291          *
 292          * @param url
 293          *            the story resource
 294          *
 295          * @return the {@link Story}
 296          *
 297          * @throws IOException
 298          *             in case of I/O error
 299          */
 300         public Story processMeta(URL url) throws IOException {
 301                 return processMeta(url, true, false);
 302         }
 303
 304         /**
 305          * Process the given story resource into a partially filled {@link Story}
 306          * object containing the name and metadata.
 307          *
 308          * @param url
 309          *            the story resource
 310          *
 311          * @param close
 312          *            close "this" and "in" when done
 313          *
 314          * @return the {@link Story}
 315          *
 316          * @throws IOException
 317          *             in case of I/O error
 318          */
 319         protected Story processMeta(URL url, boolean close, boolean getDesc)
 320                         throws IOException {
 321                 login();
 322
 323                 url = getCanonicalUrl(url);
 324
 325                 setCurrentReferer(url);
 326
 327                 in = openInput(url);
 328                 if (in == null) {
 329                         return null;
 330                 }
 331
 332                 try {
 333                         preprocess(url, getInput());
 334
 335                         Story story = new Story();
 336                         MetaData meta = getMeta(url, getInput());
 337                         if (meta.getCreationDate() == null
 338                                         || meta.getCreationDate().isEmpty()) {
 339                                 meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
 340                         }
 341                         story.setMeta(meta);
 342
 343                         if (meta != null && meta.getCover() == null) {
 344                                 meta.setCover(getDefaultCover(meta.getSubject()));
 345                         }
 346
 347                         if (getDesc) {
 348                                 String descChapterName = Instance.getTrans().getString(
 349                                                 StringId.DESCRIPTION);
 350                                 story.getMeta().setResume(
 351                                                 makeChapter(url, 0, descChapterName,
 352                                                                 getDesc(url, getInput())));
 353                         }
 354
 355                         return story;
 356                 } finally {
 357                         if (close) {
 358                                 try {
 359                                         close();
 360                                 } catch (IOException e) {
 361                                         Instance.syserr(e);
 362                                 }
 363
 364                                 if (in != null) {
 365                                         in.close();
 366                                 }
 367                         }
 368
 369                         setCurrentReferer(null);
 370                 }
 371         }
 372
 373         /**
 374          * Process the given story resource into a fully filled {@link Story}
 375          * object.
 376          *
 377          * @param url
 378          *            the story resource
 379          * @param pg
 380          *            the optional progress reporter
 381          *
 382          * @return the {@link Story}
 383          *
 384          * @throws IOException
 385          *             in case of I/O error
 386          */
 387         public Story process(URL url, Progress pg) throws IOException {
 388                 if (pg == null) {
 389                         pg = new Progress();
 390                 } else {
 391                         pg.setMinMax(0, 100);
 392                 }
 393
 394                 url = getCanonicalUrl(url);
 395                 pg.setProgress(1);
 396                 try {
 397                         Story story = processMeta(url, false, true);
 398                         pg.setProgress(10);
 399                         if (story == null) {
 400                                 pg.setProgress(100);
 401                                 return null;
 402                         }
 403
 404                         pg.setName("Retrieving " + story.getMeta().getTitle());
 405
 406                         setCurrentReferer(url);
 407
 408                         story.setChapters(new ArrayList<Chapter>());
 409
 410                         List<Entry<String, URL>> chapters = getChapters(url, getInput());
 411                         pg.setProgress(20);
 412
 413                         int i = 1;
 414                         if (chapters != null) {
 415                                 Progress pgChaps = new Progress(0, chapters.size());
 416                                 pg.addProgress(pgChaps, 80);
 417
 418                                 long words = 0;
 419                                 for (Entry<String, URL> chap : chapters) {
 420                                         setCurrentReferer(chap.getValue());
 421                                         InputStream chapIn = Instance.getCache().open(
 422                                                         chap.getValue(), this, true);
 423                                         try {
 424                                                 Chapter cc = makeChapter(url, i, chap.getKey(),
 425                                                                 getChapterContent(url, chapIn, i));
 426                                                 words += cc.getWords();
 427                                                 story.getChapters().add(cc);
 428                                                 if (story.getMeta() != null) {
 429                                                         story.getMeta().setWords(words);
 430                                                 }
 431                                         } finally {
 432                                                 chapIn.close();
 433                                         }
 434
 435                                         pgChaps.setProgress(i++);
 436                                 }
 437                         } else {
 438                                 pg.setProgress(100);
 439                         }
 440
 441                         return story;
 442
 443                 } finally {
 444                         try {
 445                                 close();
 446                         } catch (IOException e) {
 447                                 Instance.syserr(e);
 448                         }
 449
 450                         if (in != null) {
 451                                 in.close();
 452                         }
 453
 454                         setCurrentReferer(null);
 455                 }
 456         }
 457
 458         /**
 459          * The support type.
 460          *
 461          * @return the type
 462          */
 463         public SupportType getType() {
 464                 return type;
 465         }
 466
 467         /**
 468          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 469          * the current {@link URL} we work on.
 470          *
 471          * @return the referer
 472          */
 473         public URL getCurrentReferer() {
 474                 return currentReferer;
 475         }
 476
 477         /**
 478          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 479          * the current {@link URL} we work on.
 480          *
 481          * @param currentReferer
 482          *            the new referer
 483          */
 484         protected void setCurrentReferer(URL currentReferer) {
 485                 this.currentReferer = currentReferer;
 486         }
 487
 488         /**
 489          * The support type.
 490          *
 491          * @param type
 492          *            the new type
 493          *
 494          * @return this
 495          */
 496         protected BasicSupport setType(SupportType type) {
 497                 this.type = type;
 498                 return this;
 499         }
 500
 501         /**
 502          * Prepare the support if needed before processing.
 503          *
 504          * @param source
 505          *            the source of the story
 506          * @param in
 507          *            the input (the main resource)
 508          *
 509          * @throws IOException
 510          *             on I/O error
 511          */
 512         protected void preprocess(URL source, InputStream in) throws IOException {
 513         }
 514
 515         /**
 516          * Now that we have processed the {@link Story}, close the resources if any.
 517          *
 518          * @throws IOException
 519          *             on I/O error
 520          */
 521         protected void close() throws IOException {
 522         }
 523
 524         /**
 525          * Create a {@link Chapter} object from the given information, formatting
 526          * the content as it should be.
 527          *
 528          * @param number
 529          *            the chapter number
 530          * @param name
 531          *            the chapter name
 532          * @param content
 533          *            the chapter content
 534          *
 535          * @return the {@link Chapter}
 536          *
 537          * @throws IOException
 538          *             in case of I/O error
 539          */
 540         protected Chapter makeChapter(URL source, int number, String name,
 541                         String content) throws IOException {
 542                 // Chapter name: process it correctly, then remove the possible
 543                 // redundant "Chapter x: " in front of it
 544                 String chapterName = processPara(name).getContent().trim();
 545                 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
 546                                 .split(",")) {
 547                         String chapterWord = Instance.getConfig().getStringX(
 548                                         Config.CHAPTER, lang);
 549                         if (chapterName.startsWith(chapterWord)) {
 550                                 chapterName = chapterName.substring(chapterWord.length())
 551                                                 .trim();
 552                                 break;
 553                         }
 554                 }
 555
 556                 if (chapterName.startsWith(Integer.toString(number))) {
 557                         chapterName = chapterName.substring(
 558                                         Integer.toString(number).length()).trim();
 559                 }
 560
 561                 if (chapterName.startsWith(":")) {
 562                         chapterName = chapterName.substring(1).trim();
 563                 }
 564                 //
 565
 566                 Chapter chap = new Chapter(number, chapterName);
 567
 568                 if (content != null) {
 569                         List<Paragraph> paras = makeParagraphs(source, content);
 570                         long words = 0;
 571                         for (Paragraph para : paras) {
 572                                 words += para.getWords();
 573                         }
 574                         chap.setParagraphs(paras);
 575                         chap.setWords(words);
 576                 }
 577
 578                 return chap;
 579
 580         }
 581
 582         /**
 583          * Convert the given content into {@link Paragraph}s.
 584          *
 585          * @param source
 586          *            the source URL of the story
 587          * @param content
 588          *            the textual content
 589          *
 590          * @return the {@link Paragraph}s
 591          *
 592          * @throws IOException
 593          *             in case of I/O error
 594          */
 595         protected List<Paragraph> makeParagraphs(URL source, String content)
 596                         throws IOException {
 597                 if (isHtml()) {
 598                         // Special <HR> processing:
 599                         content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
 600                                         "<br/>* * *<br/>");
 601                 }
 602
 603                 List<Paragraph> paras = new ArrayList<Paragraph>();
 604
 605                 if (content != null && !content.trim().isEmpty()) {
 606                         if (isHtml()) {
 607                                 for (String line : content.split("(<p>|</p>|<br>|<br/>)")) {
 608                                         paras.add(makeParagraph(source, line.trim()));
 609                                 }
 610                         } else {
 611                                 BufferedReader buff = null;
 612                                 try {
 613                                         buff = new BufferedReader(
 614                                                         new InputStreamReader(new ByteArrayInputStream(
 615                                                                         content.getBytes("UTF-8")), "UTF-8"));
 616                                         for (String line = buff.readLine(); line != null; line = buff
 617                                                         .readLine()) {
 618                                                 paras.add(makeParagraph(source, line.trim()));
 619                                         }
 620                                 } finally {
 621                                         if (buff != null) {
 622                                                 buff.close();
 623                                         }
 624                                 }
 625                         }
 626
 627                         // Check quotes for "bad" format
 628                         List<Paragraph> newParas = new ArrayList<Paragraph>();
 629                         for (Paragraph para : paras) {
 630                                 newParas.addAll(requotify(para));
 631                         }
 632                         paras = newParas;
 633
 634                         // Remove double blanks/brks
 635                         fixBlanksBreaks(paras);
 636                 }
 637
 638                 return paras;
 639         }
 640
 641         /**
 642          * Convert the given line into a single {@link Paragraph}.
 643          *
 644          * @param source
 645          *            the source URL of the story
 646          * @param line
 647          *            the textual content of the paragraph
 648          *
 649          * @return the {@link Paragraph}
 650          */
 651         private Paragraph makeParagraph(URL source, String line) {
 652                 URL image = null;
 653                 if (line.startsWith("[") && line.endsWith("]")) {
 654                         image = getImageUrl(this, source,
 655                                         line.substring(1, line.length() - 1).trim());
 656                 }
 657
 658                 if (image != null) {
 659                         return new Paragraph(image);
 660                 } else {
 661                         return processPara(line);
 662                 }
 663         }
 664
 665         /**
 666          * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
 667          * those {@link Paragraph}s.
 668          * <p>
 669          * The resulting list will not contain a starting or trailing blank/break
 670          * nor 2 blanks or breaks following each other.
 671          *
 672          * @param paras
 673          *            the list of {@link Paragraph}s to fix
 674          */
 675         protected void fixBlanksBreaks(List<Paragraph> paras) {
 676                 boolean space = false;
 677                 boolean brk = true;
 678                 for (int i = 0; i < paras.size(); i++) {
 679                         Paragraph para = paras.get(i);
 680                         boolean thisSpace = para.getType() == ParagraphType.BLANK;
 681                         boolean thisBrk = para.getType() == ParagraphType.BREAK;
 682
 683                         if (i > 0 && space && thisBrk) {
 684                                 paras.remove(i - 1);
 685                                 i--;
 686                         } else if ((space || brk) && (thisSpace || thisBrk)) {
 687                                 paras.remove(i);
 688                                 i--;
 689                         }
 690
 691                         space = thisSpace;
 692                         brk = thisBrk;
 693                 }
 694
 695                 // Remove blank/brk at start
 696                 if (paras.size() > 0
 697                                 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
 698                                                 0).getType() == ParagraphType.BREAK)) {
 699                         paras.remove(0);
 700                 }
 701
 702                 // Remove blank/brk at end
 703                 int last = paras.size() - 1;
 704                 if (paras.size() > 0
 705                                 && (paras.get(last).getType() == ParagraphType.BLANK || paras
 706                                                 .get(last).getType() == ParagraphType.BREAK)) {
 707                         paras.remove(last);
 708                 }
 709         }
 710
 711         /**
 712          * Get the default cover related to this subject (see <tt>.info</tt> files).
 713          *
 714          * @param subject
 715          *            the subject
 716          *
 717          * @return the cover if any, or NULL
 718          */
 719         static BufferedImage getDefaultCover(String subject) {
 720                 if (subject != null && !subject.isEmpty()
 721                                 && Instance.getCoverDir() != null) {
 722                         try {
 723                                 File fileCover = new File(Instance.getCoverDir(), subject);
 724                                 return getImage(null, fileCover.toURI().toURL(), subject);
 725                         } catch (MalformedURLException e) {
 726                         }
 727                 }
 728
 729                 return null;
 730         }
 731
 732         /**
 733          * Return the list of supported image extensions.
 734          *
 735          * @param emptyAllowed
 736          *            TRUE to allow an empty extension on first place, which can be
 737          *            used when you may already have an extension in your input but
 738          *            are not sure about it
 739          *
 740          * @return the extensions
 741          */
 742         static String[] getImageExt(boolean emptyAllowed) {
 743                 if (emptyAllowed) {
 744                         return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 745                 } else {
 746                         return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 747                 }
 748         }
 749
 750         /**
 751          * Check if the given resource can be a local image or a remote image, then
 752          * refresh the cache with it if it is.
 753          *
 754          * @param source
 755          *            the story source
 756          * @param line
 757          *            the resource to check
 758          *
 759          * @return the image if found, or NULL
 760          *
 761          */
 762         static BufferedImage getImage(BasicSupport support, URL source, String line) {
 763                 URL url = getImageUrl(support, source, line);
 764                 if (url != null) {
 765                         InputStream in = null;
 766                         try {
 767                                 in = Instance.getCache().open(url, getSupport(url), true);
 768                                 return IOUtils.toImage(in);
 769                         } catch (IOException e) {
 770                         } finally {
 771                                 if (in != null) {
 772                                         try {
 773                                                 in.close();
 774                                         } catch (IOException e) {
 775                                         }
 776                                 }
 777                         }
 778                 }
 779
 780                 return null;
 781         }
 782
 783         /**
 784          * Check if the given resource can be a local image or a remote image, then
 785          * refresh the cache with it if it is.
 786          *
 787          * @param source
 788          *            the story source
 789          * @param line
 790          *            the resource to check
 791          *
 792          * @return the image URL if found, or NULL
 793          *
 794          */
 795         static URL getImageUrl(BasicSupport support, URL source, String line) {
 796                 URL url = null;
 797
 798                 if (line != null) {
 799                         // try for files
 800                         String path = null;
 801                         if (source != null) {
 802                                 path = new File(source.getFile()).getParent();
 803                                 try {
 804                                         String basePath = new File(new File(path), line.trim())
 805                                                         .getAbsolutePath();
 806                                         for (String ext : getImageExt(true)) {
 807                                                 if (new File(basePath + ext).exists()) {
 808                                                         url = new File(basePath + ext).toURI().toURL();
 809                                                 }
 810                                         }
 811                                 } catch (Exception e) {
 812                                         // Nothing to do here
 813                                 }
 814                         }
 815
 816                         if (url == null) {
 817                                 // try for URLs
 818                                 try {
 819                                         for (String ext : getImageExt(true)) {
 820                                                 if (Instance.getCache().check(new URL(line + ext))) {
 821                                                         url = new URL(line + ext);
 822                                                         break;
 823                                                 }
 824                                         }
 825
 826                                         // try out of cache
 827                                         if (url == null) {
 828                                                 for (String ext : getImageExt(true)) {
 829                                                         try {
 830                                                                 url = new URL(line + ext);
 831                                                                 Instance.getCache().refresh(url, support, true);
 832                                                                 break;
 833                                                         } catch (IOException e) {
 834                                                                 // no image with this ext
 835                                                                 url = null;
 836                                                         }
 837                                                 }
 838                                         }
 839                                 } catch (MalformedURLException e) {
 840                                         // Not an url
 841                                 }
 842                         }
 843
 844                         // refresh the cached file
 845                         if (url != null) {
 846                                 try {
 847                                         Instance.getCache().refresh(url, support, true);
 848                                 } catch (IOException e) {
 849                                         // woops, broken image
 850                                         url = null;
 851                                 }
 852                         }
 853                 }
 854
 855                 return url;
 856         }
 857
 858         /**
 859          * Open the input file that will be used through the support.
 860          *
 861          * @param source
 862          *            the source {@link URL}
 863          *
 864          * @return the {@link InputStream}
 865          *
 866          * @throws IOException
 867          *             in case of I/O error
 868          */
 869         protected InputStream openInput(URL source) throws IOException {
 870                 return Instance.getCache().open(source, this, false);
 871         }
 872
 873         /**
 874          * Reset the given {@link InputStream} and return it.
 875          *
 876          * @param in
 877          *            the {@link InputStream} to reset
 878          *
 879          * @return the same {@link InputStream} after reset
 880          */
 881         protected InputStream reset(InputStream in) {
 882                 try {
 883                         in.reset();
 884                 } catch (IOException e) {
 885                 }
 886                 return in;
 887         }
 888
 889         /**
 890          * Reset then return {@link BasicSupport#in}.
 891          *
 892          * @return {@link BasicSupport#in}
 893          */
 894         protected InputStream getInput() {
 895                 return reset(in);
 896         }
 897
 898         /**
 899          * Fix the author name if it is prefixed with some "by" {@link String}.
 900          *
 901          * @param author
 902          *            the author with a possible prefix
 903          *
 904          * @return the author without prefixes
 905          */
 906         protected String fixAuthor(String author) {
 907                 if (author != null) {
 908                         for (String suffix : new String[] { " ", ":" }) {
 909                                 for (String byString : Instance.getConfig()
 910                                                 .getString(Config.BYS).split(",")) {
 911                                         byString += suffix;
 912                                         if (author.toUpperCase().startsWith(byString.toUpperCase())) {
 913                                                 author = author.substring(byString.length()).trim();
 914                                         }
 915                                 }
 916                         }
 917
 918                         // Special case (without suffix):
 919                         if (author.startsWith("©")) {
 920                                 author = author.substring(1);
 921                         }
 922                 }
 923
 924                 return author;
 925         }
 926
 927         /**
 928          * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
 929          * and requotify them (i.e., separate them into QUOTE paragraphs and other
 930          * paragraphs (quotes or not)).
 931          *
 932          * @param para
 933          *            the paragraph to requotify (not necessarily a quote)
 934          *
 935          * @return the correctly (or so we hope) quotified paragraphs
 936          */
 937         protected List<Paragraph> requotify(Paragraph para) {
 938                 List<Paragraph> newParas = new ArrayList<Paragraph>();
 939
 940                 if (para.getType() == ParagraphType.QUOTE
 941                                 && para.getContent().length() > 2) {
 942                         String line = para.getContent();
 943                         boolean singleQ = line.startsWith("" + openQuote);
 944                         boolean doubleQ = line.startsWith("" + openDoubleQuote);
 945
 946                         // Do not try when more than one quote at a time
 947                         // (some stories are not easily readable if we do)
 948                         if (singleQ
 949                                         && line.indexOf(closeQuote, 1) < line
 950                                                         .lastIndexOf(closeQuote)) {
 951                                 newParas.add(para);
 952                                 return newParas;
 953                         }
 954                         if (doubleQ
 955                                         && line.indexOf(closeDoubleQuote, 1) < line
 956                                                         .lastIndexOf(closeDoubleQuote)) {
 957                                 newParas.add(para);
 958                                 return newParas;
 959                         }
 960                         //
 961
 962                         if (!singleQ && !doubleQ) {
 963                                 line = openDoubleQuote + line + closeDoubleQuote;
 964                                 newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
 965                                                 .getWords()));
 966                         } else {
 967                                 char open = singleQ ? openQuote : openDoubleQuote;
 968                                 char close = singleQ ? closeQuote : closeDoubleQuote;
 969
 970                                 int posDot = -1;
 971                                 boolean inQuote = false;
 972                                 int i = 0;
 973                                 for (char car : line.toCharArray()) {
 974                                         if (car == open) {
 975                                                 inQuote = true;
 976                                         } else if (car == close) {
 977                                                 inQuote = false;
 978                                         } else if (car == '.' && !inQuote) {
 979                                                 posDot = i;
 980                                                 break;
 981                                         }
 982                                         i++;
 983                                 }
 984
 985                                 if (posDot >= 0) {
 986                                         String rest = line.substring(posDot + 1).trim();
 987                                         line = line.substring(0, posDot + 1).trim();
 988                                         long words = 1;
 989                                         for (char car : line.toCharArray()) {
 990                                                 if (car == ' ') {
 991                                                         words++;
 992                                                 }
 993                                         }
 994                                         newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
 995                                         if (!rest.isEmpty()) {
 996                                                 newParas.addAll(requotify(processPara(rest)));
 997                                         }
 998                                 } else {
 999                                         newParas.add(para);
1000                                 }
1001                         }
1002                 } else {
1003                         newParas.add(para);
1004                 }
1005
1006                 return newParas;
1007         }
1008
1009         /**
1010          * Process a {@link Paragraph} from a raw line of text.
1011          * <p>
1012          * Will also fix quotes and HTML encoding if needed.
1013          *
1014          * @param line
1015          *            the raw line
1016          *
1017          * @return the processed {@link Paragraph}
1018          */
1019         protected Paragraph processPara(String line) {
1020                 line = ifUnhtml(line).trim();
1021
1022                 boolean space = true;
1023                 boolean brk = true;
1024                 boolean quote = false;
1025                 boolean tentativeCloseQuote = false;
1026                 char prev = '\0';
1027                 int dashCount = 0;
1028                 long words = 1;
1029
1030                 StringBuilder builder = new StringBuilder();
1031                 for (char car : line.toCharArray()) {
1032                         if (car != '-') {
1033                                 if (dashCount > 0) {
1034                                         // dash, ndash and mdash: - – —
1035                                         // currently: always use mdash
1036                                         builder.append(dashCount == 1 ? '-' : '—');
1037                                 }
1038                                 dashCount = 0;
1039                         }
1040
1041                         if (tentativeCloseQuote) {
1042                                 tentativeCloseQuote = false;
1043                                 if (Character.isLetterOrDigit(car)) {
1044                                         builder.append("'");
1045                                 } else {
1046                                         // handle double-single quotes as double quotes
1047                                         if (prev == car) {
1048                                                 builder.append(closeDoubleQuote);
1049                                                 continue;
1050                                         } else {
1051                                                 builder.append(closeQuote);
1052                                         }
1053                                 }
1054                         }
1055
1056                         switch (car) {
1057                         case ' ': // note: unbreakable space
1058                         case ' ':
1059                         case '\t':
1060                         case '\n': // just in case
1061                         case '\r': // just in case
1062                                 if (builder.length() > 0
1063                                                 && builder.charAt(builder.length() - 1) != ' ') {
1064                                         words++;
1065                                 }
1066                                 builder.append(' ');
1067                                 break;
1068
1069                         case '\'':
1070                                 if (space || (brk && quote)) {
1071                                         quote = true;
1072                                         // handle double-single quotes as double quotes
1073                                         if (prev == car) {
1074                                                 builder.deleteCharAt(builder.length() - 1);
1075                                                 builder.append(openDoubleQuote);
1076                                         } else {
1077                                                 builder.append(openQuote);
1078                                         }
1079                                 } else if (prev == ' ' || prev == car) {
1080                                         // handle double-single quotes as double quotes
1081                                         if (prev == car) {
1082                                                 builder.deleteCharAt(builder.length() - 1);
1083                                                 builder.append(openDoubleQuote);
1084                                         } else {
1085                                                 builder.append(openQuote);
1086                                         }
1087                                 } else {
1088                                         // it is a quote ("I'm off") or a 'quote' ("This
1089                                         // 'good' restaurant"...)
1090                                         tentativeCloseQuote = true;
1091                                 }
1092                                 break;
1093
1094                         case '"':
1095                                 if (space || (brk && quote)) {
1096                                         quote = true;
1097                                         builder.append(openDoubleQuote);
1098                                 } else if (prev == ' ') {
1099                                         builder.append(openDoubleQuote);
1100                                 } else {
1101                                         builder.append(closeDoubleQuote);
1102                                 }
1103                                 break;
1104
1105                         case '-':
1106                                 if (space) {
1107                                         quote = true;
1108                                 } else {
1109                                         dashCount++;
1110                                 }
1111                                 space = false;
1112                                 break;
1113
1114                         case '*':
1115                         case '~':
1116                         case '/':
1117                         case '\\':
1118                         case '<':
1119                         case '>':
1120                         case '=':
1121                         case '+':
1122                         case '_':
1123                         case '–':
1124                         case '—':
1125                                 space = false;
1126                                 builder.append(car);
1127                                 break;
1128
1129                         case '‘':
1130                         case '`':
1131                         case '‹':
1132                         case '﹁':
1133                         case '〈':
1134                         case '「':
1135                                 if (space || (brk && quote)) {
1136                                         quote = true;
1137                                         builder.append(openQuote);
1138                                 } else {
1139                                         // handle double-single quotes as double quotes
1140                                         if (prev == car) {
1141                                                 builder.deleteCharAt(builder.length() - 1);
1142                                                 builder.append(openDoubleQuote);
1143                                         } else {
1144                                                 builder.append(openQuote);
1145                                         }
1146                                 }
1147                                 space = false;
1148                                 brk = false;
1149                                 break;
1150
1151                         case '’':
1152                         case '›':
1153                         case '﹂':
1154                         case '〉':
1155                         case '」':
1156                                 space = false;
1157                                 brk = false;
1158                                 // handle double-single quotes as double quotes
1159                                 if (prev == car) {
1160                                         builder.deleteCharAt(builder.length() - 1);
1161                                         builder.append(closeDoubleQuote);
1162                                 } else {
1163                                         builder.append(closeQuote);
1164                                 }
1165                                 break;
1166
1167                         case '«':
1168                         case '“':
1169                         case '﹃':
1170                         case '《':
1171                         case '『':
1172                                 if (space || (brk && quote)) {
1173                                         quote = true;
1174                                         builder.append(openDoubleQuote);
1175                                 } else {
1176                                         builder.append(openDoubleQuote);
1177                                 }
1178                                 space = false;
1179                                 brk = false;
1180                                 break;
1181
1182                         case '»':
1183                         case '”':
1184                         case '﹄':
1185                         case '》':
1186                         case '』':
1187                                 space = false;
1188                                 brk = false;
1189                                 builder.append(closeDoubleQuote);
1190                                 break;
1191
1192                         default:
1193                                 space = false;
1194                                 brk = false;
1195                                 builder.append(car);
1196                                 break;
1197                         }
1198
1199                         prev = car;
1200                 }
1201
1202                 if (tentativeCloseQuote) {
1203                         tentativeCloseQuote = false;
1204                         builder.append(closeQuote);
1205                 }
1206
1207                 line = builder.toString().trim();
1208
1209                 ParagraphType type = ParagraphType.NORMAL;
1210                 if (space) {
1211                         type = ParagraphType.BLANK;
1212                 } else if (brk) {
1213                         type = ParagraphType.BREAK;
1214                 } else if (quote) {
1215                         type = ParagraphType.QUOTE;
1216                 }
1217
1218                 return new Paragraph(type, line, words);
1219         }
1220
1221         /**
1222          * Remove the HTML from the input <b>if</b> {@link BasicSupport#isHtml()} is
1223          * true.
1224          *
1225          * @param input
1226          *            the input
1227          *
1228          * @return the no html version if needed
1229          */
1230         private String ifUnhtml(String input) {
1231                 if (isHtml() && input != null) {
1232                         return StringUtils.unhtml(input);
1233                 }
1234
1235                 return input;
1236         }
1237
1238         /**
1239          * Return a {@link BasicSupport} implementation supporting the given
1240          * resource if possible.
1241          *
1242          * @param url
1243          *            the story resource
1244          *
1245          * @return an implementation that supports it, or NULL
1246          */
1247         public static BasicSupport getSupport(URL url) {
1248                 if (url == null) {
1249                         return null;
1250                 }
1251
1252                 // TEXT and INFO_TEXT always support files (not URLs though)
1253                 for (SupportType type : SupportType.values()) {
1254                         if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1255                                 BasicSupport support = getSupport(type);
1256                                 if (support != null && support.supports(url)) {
1257                                         return support;
1258                                 }
1259                         }
1260                 }
1261
1262                 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
1263                                 SupportType.TEXT }) {
1264                         BasicSupport support = getSupport(type);
1265                         if (support != null && support.supports(url)) {
1266                                 return support;
1267                         }
1268                 }
1269
1270                 return null;
1271         }
1272
1273         /**
1274          * Return a {@link BasicSupport} implementation supporting the given type.
1275          *
1276          * @param type
1277          *            the type
1278          *
1279          * @return an implementation that supports it, or NULL
1280          */
1281         public static BasicSupport getSupport(SupportType type) {
1282                 switch (type) {
1283                 case EPUB:
1284                         return new Epub().setType(type);
1285                 case INFO_TEXT:
1286                         return new InfoText().setType(type);
1287                 case FIMFICTION:
1288                         return new Fimfiction().setType(type);
1289                 case FANFICTION:
1290                         return new Fanfiction().setType(type);
1291                 case TEXT:
1292                         return new Text().setType(type);
1293                 case MANGAFOX:
1294                         return new MangaFox().setType(type);
1295                 case E621:
1296                         return new E621().setType(type);
1297                 case YIFFSTAR:
1298                         return new YiffStar().setType(type);
1299                 case CBZ:
1300                         return new Cbz().setType(type);
1301                 case HTML:
1302                         return new Html().setType(type);
1303                 }
1304
1305                 return null;
1306         }
1307
1308         /**
1309          * Return the first line from the given input which correspond to the given
1310          * selectors.
1311          *
1312          * @param in
1313          *            the input
1314          * @param needle
1315          *            a string that must be found inside the target line (also
1316          *            supports "^" at start to say "only if it starts with" the
1317          *            needle)
1318          * @param relativeLine
1319          *            the line to return based upon the target line position (-1 =
1320          *            the line before, 0 = the target line...)
1321          *
1322          * @return the line
1323          */
1324         static String getLine(InputStream in, String needle, int relativeLine) {
1325                 return getLine(in, needle, relativeLine, true);
1326         }
1327
1328         /**
1329          * Return a line from the given input which correspond to the given
1330          * selectors.
1331          *
1332          * @param in
1333          *            the input
1334          * @param needle
1335          *            a string that must be found inside the target line (also
1336          *            supports "^" at start to say "only if it starts with" the
1337          *            needle)
1338          * @param relativeLine
1339          *            the line to return based upon the target line position (-1 =
1340          *            the line before, 0 = the target line...)
1341          * @param first
1342          *            takes the first result (as opposed to the last one, which will
1343          *            also always spend the input)
1344          *
1345          * @return the line
1346          */
1347         static String getLine(InputStream in, String needle, int relativeLine,
1348                         boolean first) {
1349                 String rep = null;
1350
1351                 try {
1352                         in.reset();
1353                 } catch (IOException e) {
1354                         Instance.syserr(e);
1355                 }
1356
1357                 List<String> lines = new ArrayList<String>();
1358                 @SuppressWarnings("resource")
1359                 Scanner scan = new Scanner(in, "UTF-8");
1360                 int index = -1;
1361                 scan.useDelimiter("\\n");
1362                 while (scan.hasNext()) {
1363                         lines.add(scan.next());
1364
1365                         if (index == -1) {
1366                                 if (needle.startsWith("^")) {
1367                                         if (lines.get(lines.size() - 1).startsWith(
1368                                                         needle.substring(1))) {
1369                                                 index = lines.size() - 1;
1370                                         }
1371
1372                                 } else {
1373                                         if (lines.get(lines.size() - 1).contains(needle)) {
1374                                                 index = lines.size() - 1;
1375                                         }
1376                                 }
1377                         }
1378
1379                         if (index >= 0 && index + relativeLine < lines.size()) {
1380                                 rep = lines.get(index + relativeLine);
1381                                 if (first) {
1382                                         break;
1383                                 }
1384                         }
1385                 }
1386
1387                 return rep;
1388         }
1389 }