src/be/nikiroo/fanfix/supported/BasicSupport.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.awt.image.BufferedImage;
   4 import java.io.BufferedReader;
   5 import java.io.ByteArrayInputStream;
   6 import java.io.File;
   7 import java.io.IOException;
   8 import java.io.InputStream;
   9 import java.io.InputStreamReader;
  10 import java.net.MalformedURLException;
  11 import java.net.URL;
  12 import java.util.ArrayList;
  13 import java.util.Date;
  14 import java.util.HashMap;
  15 import java.util.List;
  16 import java.util.Map;
  17 import java.util.Map.Entry;
  18 import java.util.Scanner;
  19
  20 import be.nikiroo.fanfix.Instance;
  21 import be.nikiroo.fanfix.bundles.Config;
  22 import be.nikiroo.fanfix.bundles.StringId;
  23 import be.nikiroo.fanfix.data.Chapter;
  24 import be.nikiroo.fanfix.data.MetaData;
  25 import be.nikiroo.fanfix.data.Paragraph;
  26 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
  27 import be.nikiroo.fanfix.data.Story;
  28 import be.nikiroo.utils.IOUtils;
  29 import be.nikiroo.utils.Progress;
  30 import be.nikiroo.utils.StringUtils;
  31
  32 /**
  33  * This class is the base class used by the other support classes. It can be
  34  * used outside of this package, and have static method that you can use to get
  35  * access to the correct support class.
  36  * <p>
  37  * It will be used with 'resources' (usually web pages or files).
  38  *
  39  * @author niki
  40  */
  41 public abstract class BasicSupport {
  42         /**
  43          * The supported input types for which we can get a {@link BasicSupport}
  44          * object.
  45          *
  46          * @author niki
  47          */
  48         public enum SupportType {
  49                 /** EPUB files created with this program */
  50                 EPUB,
  51                 /** Pure text file with some rules */
  52                 TEXT,
  53                 /** TEXT but with associated .info file */
  54                 INFO_TEXT,
  55                 /** My Little Pony fanfictions */
  56                 FIMFICTION,
  57                 /** Fanfictions from a lot of different universes */
  58                 FANFICTION,
  59                 /** Website with lots of Mangas */
  60                 MANGAFOX,
  61                 /** Furry website with comics support */
  62                 E621,
  63                 /** Furry website with stories */
  64                 YIFFSTAR,
  65                 /** CBZ files */
  66                 CBZ,
  67                 /** HTML files */
  68                 HTML;
  69
  70                 /**
  71                  * A description of this support type (more information than the
  72                  * {@link BasicSupport#getSourceName()}).
  73                  *
  74                  * @return the description
  75                  */
  76                 public String getDesc() {
  77                         String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
  78                                         this.name());
  79
  80                         if (desc == null) {
  81                                 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
  82                         }
  83
  84                         return desc;
  85                 }
  86
  87                 /**
  88                  * The name of this support type (a short version).
  89                  *
  90                  * @return the name
  91                  */
  92                 public String getSourceName() {
  93                         BasicSupport support = BasicSupport.getSupport(this);
  94                         if (support != null) {
  95                                 return support.getSourceName();
  96                         }
  97
  98                         return null;
  99                 }
 100
 101                 @Override
 102                 public String toString() {
 103                         return super.toString().toLowerCase();
 104                 }
 105
 106                 /**
 107                  * Call {@link SupportType#valueOf(String.toUpperCase())}.
 108                  *
 109                  * @param typeName
 110                  *            the possible type name
 111                  *
 112                  * @return NULL or the type
 113                  */
 114                 public static SupportType valueOfUC(String typeName) {
 115                         return SupportType.valueOf(typeName == null ? null : typeName
 116                                         .toUpperCase());
 117                 }
 118
 119                 /**
 120                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 121                  * NULL for NULL instead of raising exception.
 122                  *
 123                  * @param typeName
 124                  *            the possible type name
 125                  *
 126                  * @return NULL or the type
 127                  */
 128                 public static SupportType valueOfNullOkUC(String typeName) {
 129                         if (typeName == null) {
 130                                 return null;
 131                         }
 132
 133                         return SupportType.valueOfUC(typeName);
 134                 }
 135
 136                 /**
 137                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 138                  * NULL in case of error instead of raising an exception.
 139                  *
 140                  * @param typeName
 141                  *            the possible type name
 142                  *
 143                  * @return NULL or the type
 144                  */
 145                 public static SupportType valueOfAllOkUC(String typeName) {
 146                         try {
 147                                 return SupportType.valueOfUC(typeName);
 148                         } catch (Exception e) {
 149                                 return null;
 150                         }
 151                 }
 152         }
 153
 154         private InputStream in;
 155         private SupportType type;
 156         private URL currentReferer; // with only one 'r', as in 'HTTP'...
 157
 158         // quote chars
 159         private char openQuote = Instance.getTrans().getCharacter(
 160                         StringId.OPEN_SINGLE_QUOTE);
 161         private char closeQuote = Instance.getTrans().getCharacter(
 162                         StringId.CLOSE_SINGLE_QUOTE);
 163         private char openDoubleQuote = Instance.getTrans().getCharacter(
 164                         StringId.OPEN_DOUBLE_QUOTE);
 165         private char closeDoubleQuote = Instance.getTrans().getCharacter(
 166                         StringId.CLOSE_DOUBLE_QUOTE);
 167
 168         /**
 169          * The name of this support class.
 170          *
 171          * @return the name
 172          */
 173         protected abstract String getSourceName();
 174
 175         /**
 176          * Check if the given resource is supported by this {@link BasicSupport}.
 177          *
 178          * @param url
 179          *            the resource to check for
 180          *
 181          * @return TRUE if it is
 182          */
 183         protected abstract boolean supports(URL url);
 184
 185         /**
 186          * Return TRUE if the support will return HTML encoded content values for
 187          * the chapters content.
 188          *
 189          * @return TRUE for HTML
 190          */
 191         protected abstract boolean isHtml();
 192
 193         protected abstract MetaData getMeta(URL source, InputStream in)
 194                         throws IOException;
 195
 196         /**
 197          * Return the story description.
 198          *
 199          * @param source
 200          *            the source of the story
 201          * @param in
 202          *            the input (the main resource)
 203          *
 204          * @return the description
 205          *
 206          * @throws IOException
 207          *             in case of I/O error
 208          */
 209         protected abstract String getDesc(URL source, InputStream in)
 210                         throws IOException;
 211
 212         /**
 213          * Return the list of chapters (name and resource).
 214          *
 215          * @param source
 216          *            the source of the story
 217          * @param in
 218          *            the input (the main resource)
 219          * @param pg
 220          *            the optional progress reporter
 221          *
 222          * @return the chapters
 223          *
 224          * @throws IOException
 225          *             in case of I/O error
 226          */
 227         protected abstract List<Entry<String, URL>> getChapters(URL source,
 228                         InputStream in, Progress pg) throws IOException;
 229
 230         /**
 231          * Return the content of the chapter (possibly HTML encoded, if
 232          * {@link BasicSupport#isHtml()} is TRUE).
 233          *
 234          * @param source
 235          *            the source of the story
 236          * @param in
 237          *            the input (the main resource)
 238          * @param number
 239          *            the chapter number
 240          * @param pg
 241          *            the optional progress reporter
 242          *
 243          * @return the content
 244          *
 245          * @throws IOException
 246          *             in case of I/O error
 247          */
 248         protected abstract String getChapterContent(URL source, InputStream in,
 249                         int number, Progress pg) throws IOException;
 250
 251         /**
 252          * Log into the support (can be a no-op depending upon the support).
 253          *
 254          * @throws IOException
 255          *             in case of I/O error
 256          */
 257         public void login() throws IOException {
 258
 259         }
 260
 261         /**
 262          * Return the list of cookies (values included) that must be used to
 263          * correctly fetch the resources.
 264          * <p>
 265          * You are expected to call the super method implementation if you override
 266          * it.
 267          *
 268          * @return the cookies
 269          *
 270          * @throws IOException
 271          *             in case of I/O error
 272          */
 273         public Map<String, String> getCookies() throws IOException {
 274                 return new HashMap<String, String>();
 275         }
 276
 277         /**
 278          * Return the canonical form of the main {@link URL}.
 279          *
 280          * @param source
 281          *            the source {@link URL}
 282          *
 283          * @return the canonical form of this {@link URL}
 284          *
 285          * @throws IOException
 286          *             in case of I/O error
 287          */
 288         public URL getCanonicalUrl(URL source) throws IOException {
 289                 return source;
 290         }
 291
 292         /**
 293          * Process the given story resource into a partially filled {@link Story}
 294          * object containing the name and metadata, except for the description.
 295          *
 296          * @param url
 297          *            the story resource
 298          *
 299          * @return the {@link Story}
 300          *
 301          * @throws IOException
 302          *             in case of I/O error
 303          */
 304         public Story processMeta(URL url) throws IOException {
 305                 return processMeta(url, true, false, null);
 306         }
 307
 308         /**
 309          * Process the given story resource into a partially filled {@link Story}
 310          * object containing the name and metadata.
 311          *
 312          * @param url
 313          *            the story resource
 314          *
 315          * @param close
 316          *            close "this" and "in" when done
 317          * @param pg
 318          *            the optional progress reporter
 319          *
 320          * @return the {@link Story}
 321          *
 322          * @throws IOException
 323          *             in case of I/O error
 324          */
 325         protected Story processMeta(URL url, boolean close, boolean getDesc,
 326                         Progress pg) throws IOException {
 327                 if (pg == null) {
 328                         pg = new Progress();
 329                 } else {
 330                         pg.setMinMax(0, 100);
 331                 }
 332
 333                 login();
 334                 pg.setProgress(10);
 335
 336                 url = getCanonicalUrl(url);
 337
 338                 setCurrentReferer(url);
 339
 340                 in = openInput(url);
 341                 if (in == null) {
 342                         return null;
 343                 }
 344
 345                 try {
 346                         preprocess(url, getInput());
 347                         pg.setProgress(30);
 348
 349                         Story story = new Story();
 350                         MetaData meta = getMeta(url, getInput());
 351                         if (meta.getCreationDate() == null
 352                                         || meta.getCreationDate().isEmpty()) {
 353                                 meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
 354                         }
 355                         story.setMeta(meta);
 356
 357                         pg.setProgress(50);
 358
 359                         if (meta != null && meta.getCover() == null) {
 360                                 meta.setCover(getDefaultCover(meta.getSubject()));
 361                         }
 362
 363                         pg.setProgress(60);
 364
 365                         if (getDesc) {
 366                                 String descChapterName = Instance.getTrans().getString(
 367                                                 StringId.DESCRIPTION);
 368                                 story.getMeta().setResume(
 369                                                 makeChapter(url, 0, descChapterName,
 370                                                                 getDesc(url, getInput()), null));
 371                         }
 372
 373                         pg.setProgress(100);
 374                         return story;
 375                 } finally {
 376                         if (close) {
 377                                 try {
 378                                         close();
 379                                 } catch (IOException e) {
 380                                         Instance.syserr(e);
 381                                 }
 382
 383                                 if (in != null) {
 384                                         in.close();
 385                                 }
 386                         }
 387
 388                         setCurrentReferer(null);
 389                 }
 390         }
 391
 392         /**
 393          * Process the given story resource into a fully filled {@link Story}
 394          * object.
 395          *
 396          * @param url
 397          *            the story resource
 398          * @param pg
 399          *            the optional progress reporter
 400          *
 401          * @return the {@link Story}
 402          *
 403          * @throws IOException
 404          *             in case of I/O error
 405          */
 406         public Story process(URL url, Progress pg) throws IOException {
 407                 if (pg == null) {
 408                         pg = new Progress();
 409                 } else {
 410                         pg.setMinMax(0, 100);
 411                 }
 412
 413                 url = getCanonicalUrl(url);
 414                 pg.setProgress(1);
 415                 try {
 416                         Progress pgMeta = new Progress();
 417                         pg.addProgress(pgMeta, 10);
 418                         Story story = processMeta(url, false, true, pgMeta);
 419                         if (!pgMeta.isDone()) {
 420                                 pgMeta.setProgress(pgMeta.getMax()); // 10%
 421                         }
 422
 423                         if (story == null) {
 424                                 pg.setProgress(90);
 425                                 return null;
 426                         }
 427
 428                         pg.setName("Retrieving " + story.getMeta().getTitle());
 429
 430                         setCurrentReferer(url);
 431
 432                         Progress pgGetChapters = new Progress();
 433                         pg.addProgress(pgGetChapters, 10);
 434                         story.setChapters(new ArrayList<Chapter>());
 435                         List<Entry<String, URL>> chapters = getChapters(url, getInput(),
 436                                         pgGetChapters);
 437                         if (!pgGetChapters.isDone()) {
 438                                 pgGetChapters.setProgress(pgGetChapters.getMax()); // 20%
 439                         }
 440
 441                         if (chapters != null) {
 442                                 Progress pgChaps = new Progress("Extracting chapters", 0,
 443                                                 chapters.size() * 300);
 444                                 pg.addProgress(pgChaps, 80);
 445
 446                                 long words = 0;
 447                                 int i = 1;
 448                                 for (Entry<String, URL> chap : chapters) {
 449                                         pgChaps.setName("Extracting chapter " + i);
 450                                         setCurrentReferer(chap.getValue());
 451                                         InputStream chapIn = Instance.getCache().open(
 452                                                         chap.getValue(), this, true);
 453                                         pgChaps.setProgress(i * 100);
 454                                         try {
 455                                                 Progress pgGetChapterContent = new Progress();
 456                                                 Progress pgMakeChapter = new Progress();
 457                                                 pgChaps.addProgress(pgGetChapterContent, 100);
 458                                                 pgChaps.addProgress(pgMakeChapter, 100);
 459
 460                                                 String content = getChapterContent(url, chapIn, i,
 461                                                                 pgGetChapterContent);
 462                                                 if (!pgGetChapterContent.isDone()) {
 463                                                         pgGetChapterContent.setProgress(pgGetChapterContent
 464                                                                         .getMax());
 465                                                 }
 466
 467                                                 Chapter cc = makeChapter(url, i, chap.getKey(),
 468                                                                 content, pgMakeChapter);
 469                                                 if (!pgMakeChapter.isDone()) {
 470                                                         pgMakeChapter.setProgress(pgMakeChapter.getMax());
 471                                                 }
 472
 473                                                 words += cc.getWords();
 474                                                 story.getChapters().add(cc);
 475                                                 if (story.getMeta() != null) {
 476                                                         story.getMeta().setWords(words);
 477                                                 }
 478                                         } finally {
 479                                                 chapIn.close();
 480                                         }
 481
 482                                         i++;
 483                                 }
 484
 485                                 pgChaps.setName("Extracting chapters");
 486                         } else {
 487                                 pg.setProgress(80);
 488                         }
 489
 490                         return story;
 491
 492                 } finally {
 493                         try {
 494                                 close();
 495                         } catch (IOException e) {
 496                                 Instance.syserr(e);
 497                         }
 498
 499                         if (in != null) {
 500                                 in.close();
 501                         }
 502
 503                         setCurrentReferer(null);
 504                 }
 505         }
 506
 507         /**
 508          * The support type.
 509          *
 510          * @return the type
 511          */
 512         public SupportType getType() {
 513                 return type;
 514         }
 515
 516         /**
 517          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 518          * the current {@link URL} we work on.
 519          *
 520          * @return the referer
 521          */
 522         public URL getCurrentReferer() {
 523                 return currentReferer;
 524         }
 525
 526         /**
 527          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 528          * the current {@link URL} we work on.
 529          *
 530          * @param currentReferer
 531          *            the new referer
 532          */
 533         protected void setCurrentReferer(URL currentReferer) {
 534                 this.currentReferer = currentReferer;
 535         }
 536
 537         /**
 538          * The support type.
 539          *
 540          * @param type
 541          *            the new type
 542          *
 543          * @return this
 544          */
 545         protected BasicSupport setType(SupportType type) {
 546                 this.type = type;
 547                 return this;
 548         }
 549
 550         /**
 551          * Prepare the support if needed before processing.
 552          *
 553          * @param source
 554          *            the source of the story
 555          * @param in
 556          *            the input (the main resource)
 557          *
 558          * @throws IOException
 559          *             on I/O error
 560          */
 561         protected void preprocess(URL source, InputStream in) throws IOException {
 562         }
 563
 564         /**
 565          * Now that we have processed the {@link Story}, close the resources if any.
 566          *
 567          * @throws IOException
 568          *             on I/O error
 569          */
 570         protected void close() throws IOException {
 571         }
 572
 573         /**
 574          * Create a {@link Chapter} object from the given information, formatting
 575          * the content as it should be.
 576          *
 577          * @param number
 578          *            the chapter number
 579          * @param name
 580          *            the chapter name
 581          * @param content
 582          *            the chapter content
 583          * @param pg
 584          *            the optional progress reporter
 585          *
 586          * @return the {@link Chapter}
 587          *
 588          * @throws IOException
 589          *             in case of I/O error
 590          */
 591         protected Chapter makeChapter(URL source, int number, String name,
 592                         String content, Progress pg) throws IOException {
 593                 // Chapter name: process it correctly, then remove the possible
 594                 // redundant "Chapter x: " in front of it
 595                 String chapterName = processPara(name).getContent().trim();
 596                 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
 597                                 .split(",")) {
 598                         String chapterWord = Instance.getConfig().getStringX(
 599                                         Config.CHAPTER, lang);
 600                         if (chapterName.startsWith(chapterWord)) {
 601                                 chapterName = chapterName.substring(chapterWord.length())
 602                                                 .trim();
 603                                 break;
 604                         }
 605                 }
 606
 607                 if (chapterName.startsWith(Integer.toString(number))) {
 608                         chapterName = chapterName.substring(
 609                                         Integer.toString(number).length()).trim();
 610                 }
 611
 612                 if (chapterName.startsWith(":")) {
 613                         chapterName = chapterName.substring(1).trim();
 614                 }
 615                 //
 616
 617                 Chapter chap = new Chapter(number, chapterName);
 618
 619                 if (content != null) {
 620                         List<Paragraph> paras = makeParagraphs(source, content, pg);
 621                         long words = 0;
 622                         for (Paragraph para : paras) {
 623                                 words += para.getWords();
 624                         }
 625                         chap.setParagraphs(paras);
 626                         chap.setWords(words);
 627                 }
 628
 629                 return chap;
 630
 631         }
 632
 633         /**
 634          * Convert the given content into {@link Paragraph}s.
 635          *
 636          * @param source
 637          *            the source URL of the story
 638          * @param content
 639          *            the textual content
 640          * @param pg
 641          *            the optional progress reporter
 642          *
 643          * @return the {@link Paragraph}s
 644          *
 645          * @throws IOException
 646          *             in case of I/O error
 647          */
 648         protected List<Paragraph> makeParagraphs(URL source, String content,
 649                         Progress pg) throws IOException {
 650                 if (pg == null) {
 651                         pg = new Progress();
 652                 }
 653
 654                 if (isHtml()) {
 655                         // Special <HR> processing:
 656                         content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
 657                                         "<br/>* * *<br/>");
 658                 }
 659
 660                 List<Paragraph> paras = new ArrayList<Paragraph>();
 661
 662                 if (content != null && !content.trim().isEmpty()) {
 663                         if (isHtml()) {
 664                                 String[] tab = content.split("(<p>|</p>|<br>|<br/>)");
 665                                 pg.setMinMax(0, tab.length);
 666                                 int i = 1;
 667                                 for (String line : tab) {
 668                                         if (line.startsWith("[") && line.endsWith("]")) {
 669                                                 pg.setName("Extracting image " + i);
 670                                         }
 671                                         paras.add(makeParagraph(source, line.trim()));
 672                                         pg.setProgress(i++);
 673                                 }
 674                                 pg.setName(null);
 675                         } else {
 676                                 List<String> lines = new ArrayList<String>();
 677                                 BufferedReader buff = null;
 678                                 try {
 679                                         buff = new BufferedReader(
 680                                                         new InputStreamReader(new ByteArrayInputStream(
 681                                                                         content.getBytes("UTF-8")), "UTF-8"));
 682                                         for (String line = buff.readLine(); line != null; line = buff
 683                                                         .readLine()) {
 684                                                 lines.add(line.trim());
 685                                         }
 686                                 } finally {
 687                                         if (buff != null) {
 688                                                 buff.close();
 689                                         }
 690                                 }
 691
 692                                 pg.setMinMax(0, lines.size());
 693                                 int i = 0;
 694                                 for (String line : lines) {
 695                                         if (line.startsWith("[") && line.endsWith("]")) {
 696                                                 pg.setName("Extracting image " + i);
 697                                         }
 698                                         paras.add(makeParagraph(source, line));
 699                                         pg.setProgress(i++);
 700                                 }
 701                                 pg.setName(null);
 702                         }
 703
 704                         // Check quotes for "bad" format
 705                         List<Paragraph> newParas = new ArrayList<Paragraph>();
 706                         for (Paragraph para : paras) {
 707                                 newParas.addAll(requotify(para));
 708                         }
 709                         paras = newParas;
 710
 711                         // Remove double blanks/brks
 712                         fixBlanksBreaks(paras);
 713                 }
 714
 715                 return paras;
 716         }
 717
 718         /**
 719          * Convert the given line into a single {@link Paragraph}.
 720          *
 721          * @param source
 722          *            the source URL of the story
 723          * @param line
 724          *            the textual content of the paragraph
 725          *
 726          * @return the {@link Paragraph}
 727          */
 728         private Paragraph makeParagraph(URL source, String line) {
 729                 URL image = null;
 730                 if (line.startsWith("[") && line.endsWith("]")) {
 731                         image = getImageUrl(this, source,
 732                                         line.substring(1, line.length() - 1).trim());
 733                 }
 734
 735                 if (image != null) {
 736                         return new Paragraph(image);
 737                 } else {
 738                         return processPara(line);
 739                 }
 740         }
 741
 742         /**
 743          * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
 744          * those {@link Paragraph}s.
 745          * <p>
 746          * The resulting list will not contain a starting or trailing blank/break
 747          * nor 2 blanks or breaks following each other.
 748          *
 749          * @param paras
 750          *            the list of {@link Paragraph}s to fix
 751          */
 752         protected void fixBlanksBreaks(List<Paragraph> paras) {
 753                 boolean space = false;
 754                 boolean brk = true;
 755                 for (int i = 0; i < paras.size(); i++) {
 756                         Paragraph para = paras.get(i);
 757                         boolean thisSpace = para.getType() == ParagraphType.BLANK;
 758                         boolean thisBrk = para.getType() == ParagraphType.BREAK;
 759
 760                         if (i > 0 && space && thisBrk) {
 761                                 paras.remove(i - 1);
 762                                 i--;
 763                         } else if ((space || brk) && (thisSpace || thisBrk)) {
 764                                 paras.remove(i);
 765                                 i--;
 766                         }
 767
 768                         space = thisSpace;
 769                         brk = thisBrk;
 770                 }
 771
 772                 // Remove blank/brk at start
 773                 if (paras.size() > 0
 774                                 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
 775                                                 0).getType() == ParagraphType.BREAK)) {
 776                         paras.remove(0);
 777                 }
 778
 779                 // Remove blank/brk at end
 780                 int last = paras.size() - 1;
 781                 if (paras.size() > 0
 782                                 && (paras.get(last).getType() == ParagraphType.BLANK || paras
 783                                                 .get(last).getType() == ParagraphType.BREAK)) {
 784                         paras.remove(last);
 785                 }
 786         }
 787
 788         /**
 789          * Get the default cover related to this subject (see <tt>.info</tt> files).
 790          *
 791          * @param subject
 792          *            the subject
 793          *
 794          * @return the cover if any, or NULL
 795          */
 796         static BufferedImage getDefaultCover(String subject) {
 797                 if (subject != null && !subject.isEmpty()
 798                                 && Instance.getCoverDir() != null) {
 799                         try {
 800                                 File fileCover = new File(Instance.getCoverDir(), subject);
 801                                 return getImage(null, fileCover.toURI().toURL(), subject);
 802                         } catch (MalformedURLException e) {
 803                         }
 804                 }
 805
 806                 return null;
 807         }
 808
 809         /**
 810          * Return the list of supported image extensions.
 811          *
 812          * @param emptyAllowed
 813          *            TRUE to allow an empty extension on first place, which can be
 814          *            used when you may already have an extension in your input but
 815          *            are not sure about it
 816          *
 817          * @return the extensions
 818          */
 819         static String[] getImageExt(boolean emptyAllowed) {
 820                 if (emptyAllowed) {
 821                         return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 822                 } else {
 823                         return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 824                 }
 825         }
 826
 827         /**
 828          * Check if the given resource can be a local image or a remote image, then
 829          * refresh the cache with it if it is.
 830          *
 831          * @param source
 832          *            the story source
 833          * @param line
 834          *            the resource to check
 835          *
 836          * @return the image if found, or NULL
 837          *
 838          */
 839         static BufferedImage getImage(BasicSupport support, URL source, String line) {
 840                 URL url = getImageUrl(support, source, line);
 841                 if (url != null) {
 842                         InputStream in = null;
 843                         try {
 844                                 in = Instance.getCache().open(url, getSupport(url), true);
 845                                 return IOUtils.toImage(in);
 846                         } catch (IOException e) {
 847                         } finally {
 848                                 if (in != null) {
 849                                         try {
 850                                                 in.close();
 851                                         } catch (IOException e) {
 852                                         }
 853                                 }
 854                         }
 855                 }
 856
 857                 return null;
 858         }
 859
 860         /**
 861          * Check if the given resource can be a local image or a remote image, then
 862          * refresh the cache with it if it is.
 863          *
 864          * @param source
 865          *            the story source
 866          * @param line
 867          *            the resource to check
 868          *
 869          * @return the image URL if found, or NULL
 870          *
 871          */
 872         static URL getImageUrl(BasicSupport support, URL source, String line) {
 873                 URL url = null;
 874
 875                 if (line != null) {
 876                         // try for files
 877                         if (source != null) {
 878                                 try {
 879
 880                                         String relPath = null;
 881                                         String absPath = null;
 882                                         try {
 883                                                 String path = new File(source.getFile()).getParent();
 884                                                 relPath = new File(new File(path), line.trim())
 885                                                                 .getAbsolutePath();
 886                                         } catch (Exception e) {
 887                                                 // Cannot be converted to path (one possibility to take
 888                                                 // into account: absolute path on Windows)
 889                                         }
 890                                         try {
 891                                                 absPath = new File(line.trim()).getAbsolutePath();
 892                                         } catch (Exception e) {
 893                                                 // Cannot be converted to path (at all)
 894                                         }
 895
 896                                         for (String ext : getImageExt(true)) {
 897                                                 if (absPath != null && new File(absPath + ext).exists()) {
 898                                                         url = new File(absPath + ext).toURI().toURL();
 899                                                 } else if (relPath != null
 900                                                                 && new File(relPath + ext).exists()) {
 901                                                         url = new File(relPath + ext).toURI().toURL();
 902                                                 }
 903                                         }
 904                                 } catch (Exception e) {
 905                                         // Should not happen since we control the correct arguments
 906                                 }
 907                         }
 908
 909                         if (url == null) {
 910                                 // try for URLs
 911                                 try {
 912                                         for (String ext : getImageExt(true)) {
 913                                                 if (Instance.getCache().check(new URL(line + ext))) {
 914                                                         url = new URL(line + ext);
 915                                                         break;
 916                                                 }
 917                                         }
 918
 919                                         // try out of cache
 920                                         if (url == null) {
 921                                                 for (String ext : getImageExt(true)) {
 922                                                         try {
 923                                                                 url = new URL(line + ext);
 924                                                                 Instance.getCache().refresh(url, support, true);
 925                                                                 break;
 926                                                         } catch (IOException e) {
 927                                                                 // no image with this ext
 928                                                                 url = null;
 929                                                         }
 930                                                 }
 931                                         }
 932                                 } catch (MalformedURLException e) {
 933                                         // Not an url
 934                                 }
 935                         }
 936
 937                         // refresh the cached file
 938                         if (url != null) {
 939                                 try {
 940                                         Instance.getCache().refresh(url, support, true);
 941                                 } catch (IOException e) {
 942                                         // woops, broken image
 943                                         url = null;
 944                                 }
 945                         }
 946                 }
 947
 948                 return url;
 949         }
 950
 951         /**
 952          * Open the input file that will be used through the support.
 953          *
 954          * @param source
 955          *            the source {@link URL}
 956          *
 957          * @return the {@link InputStream}
 958          *
 959          * @throws IOException
 960          *             in case of I/O error
 961          */
 962         protected InputStream openInput(URL source) throws IOException {
 963                 return Instance.getCache().open(source, this, false);
 964         }
 965
 966         /**
 967          * Reset the given {@link InputStream} and return it.
 968          *
 969          * @param in
 970          *            the {@link InputStream} to reset
 971          *
 972          * @return the same {@link InputStream} after reset
 973          */
 974         protected InputStream reset(InputStream in) {
 975                 try {
 976                         in.reset();
 977                 } catch (IOException e) {
 978                 }
 979                 return in;
 980         }
 981
 982         /**
 983          * Reset then return {@link BasicSupport#in}.
 984          *
 985          * @return {@link BasicSupport#in}
 986          */
 987         protected InputStream getInput() {
 988                 return reset(in);
 989         }
 990
 991         /**
 992          * Fix the author name if it is prefixed with some "by" {@link String}.
 993          *
 994          * @param author
 995          *            the author with a possible prefix
 996          *
 997          * @return the author without prefixes
 998          */
 999         protected String fixAuthor(String author) {
1000                 if (author != null) {
1001                         for (String suffix : new String[] { " ", ":" }) {
1002                                 for (String byString : Instance.getConfig()
1003                                                 .getString(Config.BYS).split(",")) {
1004                                         byString += suffix;
1005                                         if (author.toUpperCase().startsWith(byString.toUpperCase())) {
1006                                                 author = author.substring(byString.length()).trim();
1007                                         }
1008                                 }
1009                         }
1010
1011                         // Special case (without suffix):
1012                         if (author.startsWith("©")) {
1013                                 author = author.substring(1);
1014                         }
1015                 }
1016
1017                 return author;
1018         }
1019
1020         /**
1021          * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
1022          * and requotify them (i.e., separate them into QUOTE paragraphs and other
1023          * paragraphs (quotes or not)).
1024          *
1025          * @param para
1026          *            the paragraph to requotify (not necessarily a quote)
1027          *
1028          * @return the correctly (or so we hope) quotified paragraphs
1029          */
1030         protected List<Paragraph> requotify(Paragraph para) {
1031                 List<Paragraph> newParas = new ArrayList<Paragraph>();
1032
1033                 if (para.getType() == ParagraphType.QUOTE
1034                                 && para.getContent().length() > 2) {
1035                         String line = para.getContent();
1036                         boolean singleQ = line.startsWith("" + openQuote);
1037                         boolean doubleQ = line.startsWith("" + openDoubleQuote);
1038
1039                         // Do not try when more than one quote at a time
1040                         // (some stories are not easily readable if we do)
1041                         if (singleQ
1042                                         && line.indexOf(closeQuote, 1) < line
1043                                                         .lastIndexOf(closeQuote)) {
1044                                 newParas.add(para);
1045                                 return newParas;
1046                         }
1047                         if (doubleQ
1048                                         && line.indexOf(closeDoubleQuote, 1) < line
1049                                                         .lastIndexOf(closeDoubleQuote)) {
1050                                 newParas.add(para);
1051                                 return newParas;
1052                         }
1053                         //
1054
1055                         if (!singleQ && !doubleQ) {
1056                                 line = openDoubleQuote + line + closeDoubleQuote;
1057                                 newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
1058                                                 .getWords()));
1059                         } else {
1060                                 char open = singleQ ? openQuote : openDoubleQuote;
1061                                 char close = singleQ ? closeQuote : closeDoubleQuote;
1062
1063                                 int posDot = -1;
1064                                 boolean inQuote = false;
1065                                 int i = 0;
1066                                 for (char car : line.toCharArray()) {
1067                                         if (car == open) {
1068                                                 inQuote = true;
1069                                         } else if (car == close) {
1070                                                 inQuote = false;
1071                                         } else if (car == '.' && !inQuote) {
1072                                                 posDot = i;
1073                                                 break;
1074                                         }
1075                                         i++;
1076                                 }
1077
1078                                 if (posDot >= 0) {
1079                                         String rest = line.substring(posDot + 1).trim();
1080                                         line = line.substring(0, posDot + 1).trim();
1081                                         long words = 1;
1082                                         for (char car : line.toCharArray()) {
1083                                                 if (car == ' ') {
1084                                                         words++;
1085                                                 }
1086                                         }
1087                                         newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
1088                                         if (!rest.isEmpty()) {
1089                                                 newParas.addAll(requotify(processPara(rest)));
1090                                         }
1091                                 } else {
1092                                         newParas.add(para);
1093                                 }
1094                         }
1095                 } else {
1096                         newParas.add(para);
1097                 }
1098
1099                 return newParas;
1100         }
1101
1102         /**
1103          * Process a {@link Paragraph} from a raw line of text.
1104          * <p>
1105          * Will also fix quotes and HTML encoding if needed.
1106          *
1107          * @param line
1108          *            the raw line
1109          *
1110          * @return the processed {@link Paragraph}
1111          */
1112         protected Paragraph processPara(String line) {
1113                 line = ifUnhtml(line).trim();
1114
1115                 boolean space = true;
1116                 boolean brk = true;
1117                 boolean quote = false;
1118                 boolean tentativeCloseQuote = false;
1119                 char prev = '\0';
1120                 int dashCount = 0;
1121                 long words = 1;
1122
1123                 StringBuilder builder = new StringBuilder();
1124                 for (char car : line.toCharArray()) {
1125                         if (car != '-') {
1126                                 if (dashCount > 0) {
1127                                         // dash, ndash and mdash: - – —
1128                                         // currently: always use mdash
1129                                         builder.append(dashCount == 1 ? '-' : '—');
1130                                 }
1131                                 dashCount = 0;
1132                         }
1133
1134                         if (tentativeCloseQuote) {
1135                                 tentativeCloseQuote = false;
1136                                 if (Character.isLetterOrDigit(car)) {
1137                                         builder.append("'");
1138                                 } else {
1139                                         // handle double-single quotes as double quotes
1140                                         if (prev == car) {
1141                                                 builder.append(closeDoubleQuote);
1142                                                 continue;
1143                                         } else {
1144                                                 builder.append(closeQuote);
1145                                         }
1146                                 }
1147                         }
1148
1149                         switch (car) {
1150                         case ' ': // note: unbreakable space
1151                         case ' ':
1152                         case '\t':
1153                         case '\n': // just in case
1154                         case '\r': // just in case
1155                                 if (builder.length() > 0
1156                                                 && builder.charAt(builder.length() - 1) != ' ') {
1157                                         words++;
1158                                 }
1159                                 builder.append(' ');
1160                                 break;
1161
1162                         case '\'':
1163                                 if (space || (brk && quote)) {
1164                                         quote = true;
1165                                         // handle double-single quotes as double quotes
1166                                         if (prev == car) {
1167                                                 builder.deleteCharAt(builder.length() - 1);
1168                                                 builder.append(openDoubleQuote);
1169                                         } else {
1170                                                 builder.append(openQuote);
1171                                         }
1172                                 } else if (prev == ' ' || prev == car) {
1173                                         // handle double-single quotes as double quotes
1174                                         if (prev == car) {
1175                                                 builder.deleteCharAt(builder.length() - 1);
1176                                                 builder.append(openDoubleQuote);
1177                                         } else {
1178                                                 builder.append(openQuote);
1179                                         }
1180                                 } else {
1181                                         // it is a quote ("I'm off") or a 'quote' ("This
1182                                         // 'good' restaurant"...)
1183                                         tentativeCloseQuote = true;
1184                                 }
1185                                 break;
1186
1187                         case '"':
1188                                 if (space || (brk && quote)) {
1189                                         quote = true;
1190                                         builder.append(openDoubleQuote);
1191                                 } else if (prev == ' ') {
1192                                         builder.append(openDoubleQuote);
1193                                 } else {
1194                                         builder.append(closeDoubleQuote);
1195                                 }
1196                                 break;
1197
1198                         case '-':
1199                                 if (space) {
1200                                         quote = true;
1201                                 } else {
1202                                         dashCount++;
1203                                 }
1204                                 space = false;
1205                                 break;
1206
1207                         case '*':
1208                         case '~':
1209                         case '/':
1210                         case '\\':
1211                         case '<':
1212                         case '>':
1213                         case '=':
1214                         case '+':
1215                         case '_':
1216                         case '–':
1217                         case '—':
1218                                 space = false;
1219                                 builder.append(car);
1220                                 break;
1221
1222                         case '‘':
1223                         case '`':
1224                         case '‹':
1225                         case '﹁':
1226                         case '〈':
1227                         case '「':
1228                                 if (space || (brk && quote)) {
1229                                         quote = true;
1230                                         builder.append(openQuote);
1231                                 } else {
1232                                         // handle double-single quotes as double quotes
1233                                         if (prev == car) {
1234                                                 builder.deleteCharAt(builder.length() - 1);
1235                                                 builder.append(openDoubleQuote);
1236                                         } else {
1237                                                 builder.append(openQuote);
1238                                         }
1239                                 }
1240                                 space = false;
1241                                 brk = false;
1242                                 break;
1243
1244                         case '’':
1245                         case '›':
1246                         case '﹂':
1247                         case '〉':
1248                         case '」':
1249                                 space = false;
1250                                 brk = false;
1251                                 // handle double-single quotes as double quotes
1252                                 if (prev == car) {
1253                                         builder.deleteCharAt(builder.length() - 1);
1254                                         builder.append(closeDoubleQuote);
1255                                 } else {
1256                                         builder.append(closeQuote);
1257                                 }
1258                                 break;
1259
1260                         case '«':
1261                         case '“':
1262                         case '﹃':
1263                         case '《':
1264                         case '『':
1265                                 if (space || (brk && quote)) {
1266                                         quote = true;
1267                                         builder.append(openDoubleQuote);
1268                                 } else {
1269                                         builder.append(openDoubleQuote);
1270                                 }
1271                                 space = false;
1272                                 brk = false;
1273                                 break;
1274
1275                         case '»':
1276                         case '”':
1277                         case '﹄':
1278                         case '》':
1279                         case '』':
1280                                 space = false;
1281                                 brk = false;
1282                                 builder.append(closeDoubleQuote);
1283                                 break;
1284
1285                         default:
1286                                 space = false;
1287                                 brk = false;
1288                                 builder.append(car);
1289                                 break;
1290                         }
1291
1292                         prev = car;
1293                 }
1294
1295                 if (tentativeCloseQuote) {
1296                         tentativeCloseQuote = false;
1297                         builder.append(closeQuote);
1298                 }
1299
1300                 line = builder.toString().trim();
1301
1302                 ParagraphType type = ParagraphType.NORMAL;
1303                 if (space) {
1304                         type = ParagraphType.BLANK;
1305                 } else if (brk) {
1306                         type = ParagraphType.BREAK;
1307                 } else if (quote) {
1308                         type = ParagraphType.QUOTE;
1309                 }
1310
1311                 return new Paragraph(type, line, words);
1312         }
1313
1314         /**
1315          * Remove the HTML from the input <b>if</b> {@link BasicSupport#isHtml()} is
1316          * true.
1317          *
1318          * @param input
1319          *            the input
1320          *
1321          * @return the no html version if needed
1322          */
1323         private String ifUnhtml(String input) {
1324                 if (isHtml() && input != null) {
1325                         return StringUtils.unhtml(input);
1326                 }
1327
1328                 return input;
1329         }
1330
1331         /**
1332          * Return a {@link BasicSupport} implementation supporting the given
1333          * resource if possible.
1334          *
1335          * @param url
1336          *            the story resource
1337          *
1338          * @return an implementation that supports it, or NULL
1339          */
1340         public static BasicSupport getSupport(URL url) {
1341                 if (url == null) {
1342                         return null;
1343                 }
1344
1345                 // TEXT and INFO_TEXT always support files (not URLs though)
1346                 for (SupportType type : SupportType.values()) {
1347                         if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1348                                 BasicSupport support = getSupport(type);
1349                                 if (support != null && support.supports(url)) {
1350                                         return support;
1351                                 }
1352                         }
1353                 }
1354
1355                 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
1356                                 SupportType.TEXT }) {
1357                         BasicSupport support = getSupport(type);
1358                         if (support != null && support.supports(url)) {
1359                                 return support;
1360                         }
1361                 }
1362
1363                 return null;
1364         }
1365
1366         /**
1367          * Return a {@link BasicSupport} implementation supporting the given type.
1368          *
1369          * @param type
1370          *            the type
1371          *
1372          * @return an implementation that supports it, or NULL
1373          */
1374         public static BasicSupport getSupport(SupportType type) {
1375                 switch (type) {
1376                 case EPUB:
1377                         return new Epub().setType(type);
1378                 case INFO_TEXT:
1379                         return new InfoText().setType(type);
1380                 case FIMFICTION:
1381                         return new Fimfiction().setType(type);
1382                 case FANFICTION:
1383                         return new Fanfiction().setType(type);
1384                 case TEXT:
1385                         return new Text().setType(type);
1386                 case MANGAFOX:
1387                         return new MangaFox().setType(type);
1388                 case E621:
1389                         return new E621().setType(type);
1390                 case YIFFSTAR:
1391                         return new YiffStar().setType(type);
1392                 case CBZ:
1393                         return new Cbz().setType(type);
1394                 case HTML:
1395                         return new Html().setType(type);
1396                 }
1397
1398                 return null;
1399         }
1400
1401         /**
1402          * Return the first line from the given input which correspond to the given
1403          * selectors.
1404          *
1405          * @param in
1406          *            the input
1407          * @param needle
1408          *            a string that must be found inside the target line (also
1409          *            supports "^" at start to say "only if it starts with" the
1410          *            needle)
1411          * @param relativeLine
1412          *            the line to return based upon the target line position (-1 =
1413          *            the line before, 0 = the target line...)
1414          *
1415          * @return the line
1416          */
1417         static String getLine(InputStream in, String needle, int relativeLine) {
1418                 return getLine(in, needle, relativeLine, true);
1419         }
1420
1421         /**
1422          * Return a line from the given input which correspond to the given
1423          * selectors.
1424          *
1425          * @param in
1426          *            the input
1427          * @param needle
1428          *            a string that must be found inside the target line (also
1429          *            supports "^" at start to say "only if it starts with" the
1430          *            needle)
1431          * @param relativeLine
1432          *            the line to return based upon the target line position (-1 =
1433          *            the line before, 0 = the target line...)
1434          * @param first
1435          *            takes the first result (as opposed to the last one, which will
1436          *            also always spend the input)
1437          *
1438          * @return the line
1439          */
1440         static String getLine(InputStream in, String needle, int relativeLine,
1441                         boolean first) {
1442                 String rep = null;
1443
1444                 try {
1445                         in.reset();
1446                 } catch (IOException e) {
1447                         Instance.syserr(e);
1448                 }
1449
1450                 List<String> lines = new ArrayList<String>();
1451                 @SuppressWarnings("resource")
1452                 Scanner scan = new Scanner(in, "UTF-8");
1453                 int index = -1;
1454                 scan.useDelimiter("\\n");
1455                 while (scan.hasNext()) {
1456                         lines.add(scan.next());
1457
1458                         if (index == -1) {
1459                                 if (needle.startsWith("^")) {
1460                                         if (lines.get(lines.size() - 1).startsWith(
1461                                                         needle.substring(1))) {
1462                                                 index = lines.size() - 1;
1463                                         }
1464
1465                                 } else {
1466                                         if (lines.get(lines.size() - 1).contains(needle)) {
1467                                                 index = lines.size() - 1;
1468                                         }
1469                                 }
1470                         }
1471
1472                         if (index >= 0 && index + relativeLine < lines.size()) {
1473                                 rep = lines.get(index + relativeLine);
1474                                 if (first) {
1475                                         break;
1476                                 }
1477                         }
1478                 }
1479
1480                 return rep;
1481         }
1482 }