src/be/nikiroo/fanfix/supported/BasicSupport.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.awt.image.BufferedImage;
   4 import java.io.BufferedReader;
   5 import java.io.ByteArrayInputStream;
   6 import java.io.File;
   7 import java.io.IOException;
   8 import java.io.InputStream;
   9 import java.io.InputStreamReader;
  10 import java.net.MalformedURLException;
  11 import java.net.URL;
  12 import java.util.ArrayList;
  13 import java.util.Date;
  14 import java.util.HashMap;
  15 import java.util.List;
  16 import java.util.Map;
  17 import java.util.Map.Entry;
  18 import java.util.Scanner;
  19
  20 import be.nikiroo.fanfix.Instance;
  21 import be.nikiroo.fanfix.bundles.Config;
  22 import be.nikiroo.fanfix.bundles.StringId;
  23 import be.nikiroo.fanfix.data.Chapter;
  24 import be.nikiroo.fanfix.data.MetaData;
  25 import be.nikiroo.fanfix.data.Paragraph;
  26 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
  27 import be.nikiroo.fanfix.data.Story;
  28 import be.nikiroo.utils.IOUtils;
  29 import be.nikiroo.utils.Progress;
  30 import be.nikiroo.utils.StringUtils;
  31
  32 /**
  33  * This class is the base class used by the other support classes. It can be
  34  * used outside of this package, and have static method that you can use to get
  35  * access to the correct support class.
  36  * <p>
  37  * It will be used with 'resources' (usually web pages or files).
  38  *
  39  * @author niki
  40  */
  41 public abstract class BasicSupport {
  42         /**
  43          * The supported input types for which we can get a {@link BasicSupport}
  44          * object.
  45          *
  46          * @author niki
  47          */
  48         public enum SupportType {
  49                 /** EPUB files created with this program */
  50                 EPUB,
  51                 /** Pure text file with some rules */
  52                 TEXT,
  53                 /** TEXT but with associated .info file */
  54                 INFO_TEXT,
  55                 /** My Little Pony fanfictions */
  56                 FIMFICTION,
  57                 /** Fanfictions from a lot of different universes */
  58                 FANFICTION,
  59                 /** Website with lots of Mangas */
  60                 MANGAFOX,
  61                 /** Furry website with comics support */
  62                 E621,
  63                 /** Furry website with stories */
  64                 YIFFSTAR,
  65                 /** Comics and images groups, mostly but not only NSFW */
  66                 E_HENTAI,
  67                 /** CBZ files */
  68                 CBZ,
  69                 /** HTML files */
  70                 HTML;
  71
  72                 /**
  73                  * A description of this support type (more information than the
  74                  * {@link BasicSupport#getSourceName()}).
  75                  *
  76                  * @return the description
  77                  */
  78                 public String getDesc() {
  79                         String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
  80                                         this.name());
  81
  82                         if (desc == null) {
  83                                 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
  84                         }
  85
  86                         return desc;
  87                 }
  88
  89                 /**
  90                  * The name of this support type (a short version).
  91                  *
  92                  * @return the name
  93                  */
  94                 public String getSourceName() {
  95                         BasicSupport support = BasicSupport.getSupport(this);
  96                         if (support != null) {
  97                                 return support.getSourceName();
  98                         }
  99
 100                         return null;
 101                 }
 102
 103                 @Override
 104                 public String toString() {
 105                         return super.toString().toLowerCase();
 106                 }
 107
 108                 /**
 109                  * Call {@link SupportType#valueOf(String)} after conversion to upper
 110                  * case.
 111                  *
 112                  * @param typeName
 113                  *            the possible type name
 114                  *
 115                  * @return NULL or the type
 116                  */
 117                 public static SupportType valueOfUC(String typeName) {
 118                         return SupportType.valueOf(typeName == null ? null : typeName
 119                                         .toUpperCase());
 120                 }
 121
 122                 /**
 123                  * Call {@link SupportType#valueOf(String)} after conversion to upper
 124                  * case but return NULL for NULL instead of raising exception.
 125                  *
 126                  * @param typeName
 127                  *            the possible type name
 128                  *
 129                  * @return NULL or the type
 130                  */
 131                 public static SupportType valueOfNullOkUC(String typeName) {
 132                         if (typeName == null) {
 133                                 return null;
 134                         }
 135
 136                         return SupportType.valueOfUC(typeName);
 137                 }
 138
 139                 /**
 140                  * Call {@link SupportType#valueOf(String)} after conversion to upper
 141                  * case but return NULL in case of error instead of raising an
 142                  * exception.
 143                  *
 144                  * @param typeName
 145                  *            the possible type name
 146                  *
 147                  * @return NULL or the type
 148                  */
 149                 public static SupportType valueOfAllOkUC(String typeName) {
 150                         try {
 151                                 return SupportType.valueOfUC(typeName);
 152                         } catch (Exception e) {
 153                                 return null;
 154                         }
 155                 }
 156         }
 157
 158         private InputStream in;
 159         private SupportType type;
 160         private URL currentReferer; // with only one 'r', as in 'HTTP'...
 161
 162         // quote chars
 163         private char openQuote = Instance.getTrans().getCharacter(
 164                         StringId.OPEN_SINGLE_QUOTE);
 165         private char closeQuote = Instance.getTrans().getCharacter(
 166                         StringId.CLOSE_SINGLE_QUOTE);
 167         private char openDoubleQuote = Instance.getTrans().getCharacter(
 168                         StringId.OPEN_DOUBLE_QUOTE);
 169         private char closeDoubleQuote = Instance.getTrans().getCharacter(
 170                         StringId.CLOSE_DOUBLE_QUOTE);
 171
 172         /**
 173          * The name of this support class.
 174          *
 175          * @return the name
 176          */
 177         protected abstract String getSourceName();
 178
 179         /**
 180          * Check if the given resource is supported by this {@link BasicSupport}.
 181          *
 182          * @param url
 183          *            the resource to check for
 184          *
 185          * @return TRUE if it is
 186          */
 187         protected abstract boolean supports(URL url);
 188
 189         /**
 190          * Return TRUE if the support will return HTML encoded content values for
 191          * the chapters content.
 192          *
 193          * @return TRUE for HTML
 194          */
 195         protected abstract boolean isHtml();
 196
 197         /**
 198          * Return the {@link MetaData} of this story.
 199          *
 200          * @param source
 201          *            the source of the story
 202          * @param in
 203          *            the input (the main resource)
 204          *
 205          * @return the associated {@link MetaData}
 206          *
 207          * @throws IOException
 208          *             in case of I/O error
 209          */
 210         protected abstract MetaData getMeta(URL source, InputStream in)
 211                         throws IOException;
 212
 213         /**
 214          * Return the story description.
 215          *
 216          * @param source
 217          *            the source of the story
 218          * @param in
 219          *            the input (the main resource)
 220          *
 221          * @return the description
 222          *
 223          * @throws IOException
 224          *             in case of I/O error
 225          */
 226         protected abstract String getDesc(URL source, InputStream in)
 227                         throws IOException;
 228
 229         /**
 230          * Return the list of chapters (name and resource).
 231          *
 232          * @param source
 233          *            the source of the story
 234          * @param in
 235          *            the input (the main resource)
 236          * @param pg
 237          *            the optional progress reporter
 238          *
 239          * @return the chapters
 240          *
 241          * @throws IOException
 242          *             in case of I/O error
 243          */
 244         protected abstract List<Entry<String, URL>> getChapters(URL source,
 245                         InputStream in, Progress pg) throws IOException;
 246
 247         /**
 248          * Return the content of the chapter (possibly HTML encoded, if
 249          * {@link BasicSupport#isHtml()} is TRUE).
 250          *
 251          * @param source
 252          *            the source of the story
 253          * @param in
 254          *            the input (the main resource)
 255          * @param number
 256          *            the chapter number
 257          * @param pg
 258          *            the optional progress reporter
 259          *
 260          * @return the content
 261          *
 262          * @throws IOException
 263          *             in case of I/O error
 264          */
 265         protected abstract String getChapterContent(URL source, InputStream in,
 266                         int number, Progress pg) throws IOException;
 267
 268         /**
 269          * Log into the support (can be a no-op depending upon the support).
 270          *
 271          * @throws IOException
 272          *             in case of I/O error
 273          */
 274         public void login() throws IOException {
 275
 276         }
 277
 278         /**
 279          * Return the list of cookies (values included) that must be used to
 280          * correctly fetch the resources.
 281          * <p>
 282          * You are expected to call the super method implementation if you override
 283          * it.
 284          *
 285          * @return the cookies
 286          *
 287          * @throws IOException
 288          *             in case of I/O error
 289          */
 290         public Map<String, String> getCookies() throws IOException {
 291                 return new HashMap<String, String>();
 292         }
 293
 294         /**
 295          * Return the canonical form of the main {@link URL}.
 296          *
 297          * @param source
 298          *            the source {@link URL}
 299          *
 300          * @return the canonical form of this {@link URL}
 301          *
 302          * @throws IOException
 303          *             in case of I/O error
 304          */
 305         public URL getCanonicalUrl(URL source) throws IOException {
 306                 return source;
 307         }
 308
 309         /**
 310          * Process the given story resource into a partially filled {@link Story}
 311          * object containing the name and metadata, except for the description.
 312          *
 313          * @param url
 314          *            the story resource
 315          *
 316          * @return the {@link Story}
 317          *
 318          * @throws IOException
 319          *             in case of I/O error
 320          */
 321         public Story processMeta(URL url) throws IOException {
 322                 return processMeta(url, true, false, null);
 323         }
 324
 325         /**
 326          * Process the given story resource into a partially filled {@link Story}
 327          * object containing the name and metadata.
 328          *
 329          * @param url
 330          *            the story resource
 331          * @param close
 332          *            close "this" and "in" when done
 333          * @param getDesc
 334          *            retrieve the description of the story, or not
 335          * @param pg
 336          *            the optional progress reporter
 337          *
 338          * @return the {@link Story}
 339          *
 340          * @throws IOException
 341          *             in case of I/O error
 342          */
 343         protected Story processMeta(URL url, boolean close, boolean getDesc,
 344                         Progress pg) throws IOException {
 345                 if (pg == null) {
 346                         pg = new Progress();
 347                 } else {
 348                         pg.setMinMax(0, 100);
 349                 }
 350
 351                 login();
 352                 pg.setProgress(10);
 353
 354                 url = getCanonicalUrl(url);
 355
 356                 setCurrentReferer(url);
 357
 358                 in = openInput(url);
 359                 if (in == null) {
 360                         return null;
 361                 }
 362
 363                 try {
 364                         preprocess(url, getInput());
 365                         pg.setProgress(30);
 366
 367                         Story story = new Story();
 368                         MetaData meta = getMeta(url, getInput());
 369                         if (meta.getCreationDate() == null
 370                                         || meta.getCreationDate().isEmpty()) {
 371                                 meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
 372                         }
 373                         story.setMeta(meta);
 374
 375                         pg.setProgress(50);
 376
 377                         if (meta != null && meta.getCover() == null) {
 378                                 meta.setCover(getDefaultCover(meta.getSubject()));
 379                         }
 380
 381                         pg.setProgress(60);
 382
 383                         if (getDesc) {
 384                                 String descChapterName = Instance.getTrans().getString(
 385                                                 StringId.DESCRIPTION);
 386                                 story.getMeta().setResume(
 387                                                 makeChapter(url, 0, descChapterName,
 388                                                                 getDesc(url, getInput()), null));
 389                         }
 390
 391                         pg.setProgress(100);
 392                         return story;
 393                 } finally {
 394                         if (close) {
 395                                 try {
 396                                         close();
 397                                 } catch (IOException e) {
 398                                         Instance.syserr(e);
 399                                 }
 400
 401                                 if (in != null) {
 402                                         in.close();
 403                                 }
 404                         }
 405
 406                         setCurrentReferer(null);
 407                 }
 408         }
 409
 410         /**
 411          * Process the given story resource into a fully filled {@link Story}
 412          * object.
 413          *
 414          * @param url
 415          *            the story resource
 416          * @param pg
 417          *            the optional progress reporter
 418          *
 419          * @return the {@link Story}
 420          *
 421          * @throws IOException
 422          *             in case of I/O error
 423          */
 424         public Story process(URL url, Progress pg) throws IOException {
 425                 if (pg == null) {
 426                         pg = new Progress();
 427                 } else {
 428                         pg.setMinMax(0, 100);
 429                 }
 430
 431                 url = getCanonicalUrl(url);
 432                 pg.setProgress(1);
 433                 try {
 434                         Progress pgMeta = new Progress();
 435                         pg.addProgress(pgMeta, 10);
 436                         Story story = processMeta(url, false, true, pgMeta);
 437                         if (!pgMeta.isDone()) {
 438                                 pgMeta.setProgress(pgMeta.getMax()); // 10%
 439                         }
 440
 441                         if (story == null) {
 442                                 pg.setProgress(90);
 443                                 return null;
 444                         }
 445
 446                         pg.setName("Retrieving " + story.getMeta().getTitle());
 447
 448                         setCurrentReferer(url);
 449
 450                         Progress pgGetChapters = new Progress();
 451                         pg.addProgress(pgGetChapters, 10);
 452                         story.setChapters(new ArrayList<Chapter>());
 453                         List<Entry<String, URL>> chapters = getChapters(url, getInput(),
 454                                         pgGetChapters);
 455                         if (!pgGetChapters.isDone()) {
 456                                 pgGetChapters.setProgress(pgGetChapters.getMax()); // 20%
 457                         }
 458
 459                         if (chapters != null) {
 460                                 Progress pgChaps = new Progress("Extracting chapters", 0,
 461                                                 chapters.size() * 300);
 462                                 pg.addProgress(pgChaps, 80);
 463
 464                                 long words = 0;
 465                                 int i = 1;
 466                                 for (Entry<String, URL> chap : chapters) {
 467                                         pgChaps.setName("Extracting chapter " + i);
 468                                         setCurrentReferer(chap.getValue());
 469                                         InputStream chapIn = Instance.getCache().open(
 470                                                         chap.getValue(), this, true);
 471                                         pgChaps.setProgress(i * 100);
 472                                         try {
 473                                                 Progress pgGetChapterContent = new Progress();
 474                                                 Progress pgMakeChapter = new Progress();
 475                                                 pgChaps.addProgress(pgGetChapterContent, 100);
 476                                                 pgChaps.addProgress(pgMakeChapter, 100);
 477
 478                                                 String content = getChapterContent(url, chapIn, i,
 479                                                                 pgGetChapterContent);
 480                                                 if (!pgGetChapterContent.isDone()) {
 481                                                         pgGetChapterContent.setProgress(pgGetChapterContent
 482                                                                         .getMax());
 483                                                 }
 484
 485                                                 Chapter cc = makeChapter(url, i, chap.getKey(),
 486                                                                 content, pgMakeChapter);
 487                                                 if (!pgMakeChapter.isDone()) {
 488                                                         pgMakeChapter.setProgress(pgMakeChapter.getMax());
 489                                                 }
 490
 491                                                 words += cc.getWords();
 492                                                 story.getChapters().add(cc);
 493                                                 if (story.getMeta() != null) {
 494                                                         story.getMeta().setWords(words);
 495                                                 }
 496                                         } finally {
 497                                                 chapIn.close();
 498                                         }
 499
 500                                         i++;
 501                                 }
 502
 503                                 pgChaps.setName("Extracting chapters");
 504                         } else {
 505                                 pg.setProgress(80);
 506                         }
 507
 508                         return story;
 509
 510                 } finally {
 511                         try {
 512                                 close();
 513                         } catch (IOException e) {
 514                                 Instance.syserr(e);
 515                         }
 516
 517                         if (in != null) {
 518                                 in.close();
 519                         }
 520
 521                         setCurrentReferer(null);
 522                 }
 523         }
 524
 525         /**
 526          * The support type.
 527          *
 528          * @return the type
 529          */
 530         public SupportType getType() {
 531                 return type;
 532         }
 533
 534         /**
 535          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 536          * the current {@link URL} we work on.
 537          *
 538          * @return the referer
 539          */
 540         public URL getCurrentReferer() {
 541                 return currentReferer;
 542         }
 543
 544         /**
 545          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 546          * the current {@link URL} we work on.
 547          *
 548          * @param currentReferer
 549          *            the new referer
 550          */
 551         protected void setCurrentReferer(URL currentReferer) {
 552                 this.currentReferer = currentReferer;
 553         }
 554
 555         /**
 556          * The support type.
 557          *
 558          * @param type
 559          *            the new type
 560          *
 561          * @return this
 562          */
 563         protected BasicSupport setType(SupportType type) {
 564                 this.type = type;
 565                 return this;
 566         }
 567
 568         /**
 569          * Prepare the support if needed before processing.
 570          *
 571          * @param source
 572          *            the source of the story
 573          * @param in
 574          *            the input (the main resource)
 575          *
 576          * @throws IOException
 577          *             on I/O error
 578          */
 579         protected void preprocess(URL source, InputStream in) throws IOException {
 580         }
 581
 582         /**
 583          * Now that we have processed the {@link Story}, close the resources if any.
 584          *
 585          * @throws IOException
 586          *             on I/O error
 587          */
 588         protected void close() throws IOException {
 589         }
 590
 591         /**
 592          * Create a {@link Chapter} object from the given information, formatting
 593          * the content as it should be.
 594          *
 595          * @param source
 596          *            the source of the story
 597          * @param number
 598          *            the chapter number
 599          * @param name
 600          *            the chapter name
 601          * @param content
 602          *            the chapter content
 603          * @param pg
 604          *            the optional progress reporter
 605          *
 606          * @return the {@link Chapter}
 607          *
 608          * @throws IOException
 609          *             in case of I/O error
 610          */
 611         protected Chapter makeChapter(URL source, int number, String name,
 612                         String content, Progress pg) throws IOException {
 613                 // Chapter name: process it correctly, then remove the possible
 614                 // redundant "Chapter x: " in front of it, or "-" (as in
 615                 // "Chapter 5: - Fun!" after the ": " was automatically added)
 616                 String chapterName = processPara(name).getContent().trim();
 617                 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
 618                                 .split(",")) {
 619                         String chapterWord = Instance.getConfig().getStringX(
 620                                         Config.CHAPTER, lang);
 621                         if (chapterName.startsWith(chapterWord)) {
 622                                 chapterName = chapterName.substring(chapterWord.length())
 623                                                 .trim();
 624                                 break;
 625                         }
 626                 }
 627
 628                 if (chapterName.startsWith(Integer.toString(number))) {
 629                         chapterName = chapterName.substring(
 630                                         Integer.toString(number).length()).trim();
 631                 }
 632
 633                 while (chapterName.startsWith(":") || chapterName.startsWith("-")) {
 634                         chapterName = chapterName.substring(1).trim();
 635                 }
 636                 //
 637
 638                 Chapter chap = new Chapter(number, chapterName);
 639
 640                 if (content != null) {
 641                         List<Paragraph> paras = makeParagraphs(source, content, pg);
 642                         long words = 0;
 643                         for (Paragraph para : paras) {
 644                                 words += para.getWords();
 645                         }
 646                         chap.setParagraphs(paras);
 647                         chap.setWords(words);
 648                 }
 649
 650                 return chap;
 651
 652         }
 653
 654         /**
 655          * Convert the given content into {@link Paragraph}s.
 656          *
 657          * @param source
 658          *            the source URL of the story
 659          * @param content
 660          *            the textual content
 661          * @param pg
 662          *            the optional progress reporter
 663          *
 664          * @return the {@link Paragraph}s
 665          *
 666          * @throws IOException
 667          *             in case of I/O error
 668          */
 669         protected List<Paragraph> makeParagraphs(URL source, String content,
 670                         Progress pg) throws IOException {
 671                 if (pg == null) {
 672                         pg = new Progress();
 673                 }
 674
 675                 if (isHtml()) {
 676                         // Special <HR> processing:
 677                         content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
 678                                         "<br/>* * *<br/>");
 679                 }
 680
 681                 List<Paragraph> paras = new ArrayList<Paragraph>();
 682
 683                 if (content != null && !content.trim().isEmpty()) {
 684                         if (isHtml()) {
 685                                 String[] tab = content.split("(<p>|</p>|<br>|<br/>)");
 686                                 pg.setMinMax(0, tab.length);
 687                                 int i = 1;
 688                                 for (String line : tab) {
 689                                         if (line.startsWith("[") && line.endsWith("]")) {
 690                                                 pg.setName("Extracting image " + i);
 691                                         }
 692                                         paras.add(makeParagraph(source, line.trim()));
 693                                         pg.setProgress(i++);
 694                                 }
 695                                 pg.setName(null);
 696                         } else {
 697                                 List<String> lines = new ArrayList<String>();
 698                                 BufferedReader buff = null;
 699                                 try {
 700                                         buff = new BufferedReader(
 701                                                         new InputStreamReader(new ByteArrayInputStream(
 702                                                                         content.getBytes("UTF-8")), "UTF-8"));
 703                                         for (String line = buff.readLine(); line != null; line = buff
 704                                                         .readLine()) {
 705                                                 lines.add(line.trim());
 706                                         }
 707                                 } finally {
 708                                         if (buff != null) {
 709                                                 buff.close();
 710                                         }
 711                                 }
 712
 713                                 pg.setMinMax(0, lines.size());
 714                                 int i = 0;
 715                                 for (String line : lines) {
 716                                         if (line.startsWith("[") && line.endsWith("]")) {
 717                                                 pg.setName("Extracting image " + i);
 718                                         }
 719                                         paras.add(makeParagraph(source, line));
 720                                         pg.setProgress(i++);
 721                                 }
 722                                 pg.setName(null);
 723                         }
 724
 725                         // Check quotes for "bad" format
 726                         List<Paragraph> newParas = new ArrayList<Paragraph>();
 727                         for (Paragraph para : paras) {
 728                                 newParas.addAll(requotify(para));
 729                         }
 730                         paras = newParas;
 731
 732                         // Remove double blanks/brks
 733                         fixBlanksBreaks(paras);
 734                 }
 735
 736                 return paras;
 737         }
 738
 739         /**
 740          * Convert the given line into a single {@link Paragraph}.
 741          *
 742          * @param source
 743          *            the source URL of the story
 744          * @param line
 745          *            the textual content of the paragraph
 746          *
 747          * @return the {@link Paragraph}
 748          */
 749         private Paragraph makeParagraph(URL source, String line) {
 750                 URL image = null;
 751                 if (line.startsWith("[") && line.endsWith("]")) {
 752                         image = getImageUrl(this, source,
 753                                         line.substring(1, line.length() - 1).trim());
 754                 }
 755
 756                 if (image != null) {
 757                         return new Paragraph(image);
 758                 } else {
 759                         return processPara(line);
 760                 }
 761         }
 762
 763         /**
 764          * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
 765          * those {@link Paragraph}s.
 766          * <p>
 767          * The resulting list will not contain a starting or trailing blank/break
 768          * nor 2 blanks or breaks following each other.
 769          *
 770          * @param paras
 771          *            the list of {@link Paragraph}s to fix
 772          */
 773         protected void fixBlanksBreaks(List<Paragraph> paras) {
 774                 boolean space = false;
 775                 boolean brk = true;
 776                 for (int i = 0; i < paras.size(); i++) {
 777                         Paragraph para = paras.get(i);
 778                         boolean thisSpace = para.getType() == ParagraphType.BLANK;
 779                         boolean thisBrk = para.getType() == ParagraphType.BREAK;
 780
 781                         if (i > 0 && space && thisBrk) {
 782                                 paras.remove(i - 1);
 783                                 i--;
 784                         } else if ((space || brk) && (thisSpace || thisBrk)) {
 785                                 paras.remove(i);
 786                                 i--;
 787                         }
 788
 789                         space = thisSpace;
 790                         brk = thisBrk;
 791                 }
 792
 793                 // Remove blank/brk at start
 794                 if (paras.size() > 0
 795                                 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
 796                                                 0).getType() == ParagraphType.BREAK)) {
 797                         paras.remove(0);
 798                 }
 799
 800                 // Remove blank/brk at end
 801                 int last = paras.size() - 1;
 802                 if (paras.size() > 0
 803                                 && (paras.get(last).getType() == ParagraphType.BLANK || paras
 804                                                 .get(last).getType() == ParagraphType.BREAK)) {
 805                         paras.remove(last);
 806                 }
 807         }
 808
 809         /**
 810          * Get the default cover related to this subject (see <tt>.info</tt> files).
 811          *
 812          * @param subject
 813          *            the subject
 814          *
 815          * @return the cover if any, or NULL
 816          */
 817         static BufferedImage getDefaultCover(String subject) {
 818                 if (subject != null && !subject.isEmpty()
 819                                 && Instance.getCoverDir() != null) {
 820                         try {
 821                                 File fileCover = new File(Instance.getCoverDir(), subject);
 822                                 return getImage(null, fileCover.toURI().toURL(), subject);
 823                         } catch (MalformedURLException e) {
 824                         }
 825                 }
 826
 827                 return null;
 828         }
 829
 830         /**
 831          * Return the list of supported image extensions.
 832          *
 833          * @param emptyAllowed
 834          *            TRUE to allow an empty extension on first place, which can be
 835          *            used when you may already have an extension in your input but
 836          *            are not sure about it
 837          *
 838          * @return the extensions
 839          */
 840         static String[] getImageExt(boolean emptyAllowed) {
 841                 if (emptyAllowed) {
 842                         return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 843                 } else {
 844                         return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 845                 }
 846         }
 847
 848         /**
 849          * Check if the given resource can be a local image or a remote image, then
 850          * refresh the cache with it if it is.
 851          *
 852          * @param source
 853          *            the story source
 854          * @param line
 855          *            the resource to check
 856          *
 857          * @return the image if found, or NULL
 858          *
 859          */
 860         static BufferedImage getImage(BasicSupport support, URL source, String line) {
 861                 URL url = getImageUrl(support, source, line);
 862                 if (url != null) {
 863                         InputStream in = null;
 864                         try {
 865                                 in = Instance.getCache().open(url, getSupport(url), true);
 866                                 return IOUtils.toImage(in);
 867                         } catch (IOException e) {
 868                         } finally {
 869                                 if (in != null) {
 870                                         try {
 871                                                 in.close();
 872                                         } catch (IOException e) {
 873                                         }
 874                                 }
 875                         }
 876                 }
 877
 878                 return null;
 879         }
 880
 881         /**
 882          * Check if the given resource can be a local image or a remote image, then
 883          * refresh the cache with it if it is.
 884          *
 885          * @param source
 886          *            the story source
 887          * @param line
 888          *            the resource to check
 889          *
 890          * @return the image URL if found, or NULL
 891          *
 892          */
 893         static URL getImageUrl(BasicSupport support, URL source, String line) {
 894                 URL url = null;
 895
 896                 if (line != null) {
 897                         // try for files
 898                         if (source != null) {
 899                                 try {
 900
 901                                         String relPath = null;
 902                                         String absPath = null;
 903                                         try {
 904                                                 String path = new File(source.getFile()).getParent();
 905                                                 relPath = new File(new File(path), line.trim())
 906                                                                 .getAbsolutePath();
 907                                         } catch (Exception e) {
 908                                                 // Cannot be converted to path (one possibility to take
 909                                                 // into account: absolute path on Windows)
 910                                         }
 911                                         try {
 912                                                 absPath = new File(line.trim()).getAbsolutePath();
 913                                         } catch (Exception e) {
 914                                                 // Cannot be converted to path (at all)
 915                                         }
 916
 917                                         for (String ext : getImageExt(true)) {
 918                                                 if (absPath != null && new File(absPath + ext).exists()) {
 919                                                         url = new File(absPath + ext).toURI().toURL();
 920                                                 } else if (relPath != null
 921                                                                 && new File(relPath + ext).exists()) {
 922                                                         url = new File(relPath + ext).toURI().toURL();
 923                                                 }
 924                                         }
 925                                 } catch (Exception e) {
 926                                         // Should not happen since we control the correct arguments
 927                                 }
 928                         }
 929
 930                         if (url == null) {
 931                                 // try for URLs
 932                                 try {
 933                                         for (String ext : getImageExt(true)) {
 934                                                 if (Instance.getCache().check(new URL(line + ext))) {
 935                                                         url = new URL(line + ext);
 936                                                         break;
 937                                                 }
 938                                         }
 939
 940                                         // try out of cache
 941                                         if (url == null) {
 942                                                 for (String ext : getImageExt(true)) {
 943                                                         try {
 944                                                                 url = new URL(line + ext);
 945                                                                 Instance.getCache().refresh(url, support, true);
 946                                                                 break;
 947                                                         } catch (IOException e) {
 948                                                                 // no image with this ext
 949                                                                 url = null;
 950                                                         }
 951                                                 }
 952                                         }
 953                                 } catch (MalformedURLException e) {
 954                                         // Not an url
 955                                 }
 956                         }
 957
 958                         // refresh the cached file
 959                         if (url != null) {
 960                                 try {
 961                                         Instance.getCache().refresh(url, support, true);
 962                                 } catch (IOException e) {
 963                                         // woops, broken image
 964                                         url = null;
 965                                 }
 966                         }
 967                 }
 968
 969                 return url;
 970         }
 971
 972         /**
 973          * Open the input file that will be used through the support.
 974          *
 975          * @param source
 976          *            the source {@link URL}
 977          *
 978          * @return the {@link InputStream}
 979          *
 980          * @throws IOException
 981          *             in case of I/O error
 982          */
 983         protected InputStream openInput(URL source) throws IOException {
 984                 return Instance.getCache().open(source, this, false);
 985         }
 986
 987         /**
 988          * Reset the given {@link InputStream} and return it.
 989          *
 990          * @param in
 991          *            the {@link InputStream} to reset
 992          *
 993          * @return the same {@link InputStream} after reset
 994          */
 995         protected InputStream reset(InputStream in) {
 996                 try {
 997                         in.reset();
 998                 } catch (IOException e) {
 999                 }
1000                 return in;
1001         }
1002
1003         /**
1004          * Reset then return {@link BasicSupport#in}.
1005          *
1006          * @return {@link BasicSupport#in}
1007          */
1008         protected InputStream getInput() {
1009                 return reset(in);
1010         }
1011
1012         /**
1013          * Fix the author name if it is prefixed with some "by" {@link String}.
1014          *
1015          * @param author
1016          *            the author with a possible prefix
1017          *
1018          * @return the author without prefixes
1019          */
1020         protected String fixAuthor(String author) {
1021                 if (author != null) {
1022                         for (String suffix : new String[] { " ", ":" }) {
1023                                 for (String byString : Instance.getConfig()
1024                                                 .getString(Config.BYS).split(",")) {
1025                                         byString += suffix;
1026                                         if (author.toUpperCase().startsWith(byString.toUpperCase())) {
1027                                                 author = author.substring(byString.length()).trim();
1028                                         }
1029                                 }
1030                         }
1031
1032                         // Special case (without suffix):
1033                         if (author.startsWith("©")) {
1034                                 author = author.substring(1);
1035                         }
1036                 }
1037
1038                 return author;
1039         }
1040
1041         /**
1042          * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
1043          * and requotify them (i.e., separate them into QUOTE paragraphs and other
1044          * paragraphs (quotes or not)).
1045          *
1046          * @param para
1047          *            the paragraph to requotify (not necessarily a quote)
1048          *
1049          * @return the correctly (or so we hope) quotified paragraphs
1050          */
1051         protected List<Paragraph> requotify(Paragraph para) {
1052                 List<Paragraph> newParas = new ArrayList<Paragraph>();
1053
1054                 if (para.getType() == ParagraphType.QUOTE
1055                                 && para.getContent().length() > 2) {
1056                         String line = para.getContent();
1057                         boolean singleQ = line.startsWith("" + openQuote);
1058                         boolean doubleQ = line.startsWith("" + openDoubleQuote);
1059
1060                         // Do not try when more than one quote at a time
1061                         // (some stories are not easily readable if we do)
1062                         if (singleQ
1063                                         && line.indexOf(closeQuote, 1) < line
1064                                                         .lastIndexOf(closeQuote)) {
1065                                 newParas.add(para);
1066                                 return newParas;
1067                         }
1068                         if (doubleQ
1069                                         && line.indexOf(closeDoubleQuote, 1) < line
1070                                                         .lastIndexOf(closeDoubleQuote)) {
1071                                 newParas.add(para);
1072                                 return newParas;
1073                         }
1074                         //
1075
1076                         if (!singleQ && !doubleQ) {
1077                                 line = openDoubleQuote + line + closeDoubleQuote;
1078                                 newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
1079                                                 .getWords()));
1080                         } else {
1081                                 char open = singleQ ? openQuote : openDoubleQuote;
1082                                 char close = singleQ ? closeQuote : closeDoubleQuote;
1083
1084                                 int posDot = -1;
1085                                 boolean inQuote = false;
1086                                 int i = 0;
1087                                 for (char car : line.toCharArray()) {
1088                                         if (car == open) {
1089                                                 inQuote = true;
1090                                         } else if (car == close) {
1091                                                 inQuote = false;
1092                                         } else if (car == '.' && !inQuote) {
1093                                                 posDot = i;
1094                                                 break;
1095                                         }
1096                                         i++;
1097                                 }
1098
1099                                 if (posDot >= 0) {
1100                                         String rest = line.substring(posDot + 1).trim();
1101                                         line = line.substring(0, posDot + 1).trim();
1102                                         long words = 1;
1103                                         for (char car : line.toCharArray()) {
1104                                                 if (car == ' ') {
1105                                                         words++;
1106                                                 }
1107                                         }
1108                                         newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
1109                                         if (!rest.isEmpty()) {
1110                                                 newParas.addAll(requotify(processPara(rest)));
1111                                         }
1112                                 } else {
1113                                         newParas.add(para);
1114                                 }
1115                         }
1116                 } else {
1117                         newParas.add(para);
1118                 }
1119
1120                 return newParas;
1121         }
1122
1123         /**
1124          * Process a {@link Paragraph} from a raw line of text.
1125          * <p>
1126          * Will also fix quotes and HTML encoding if needed.
1127          *
1128          * @param line
1129          *            the raw line
1130          *
1131          * @return the processed {@link Paragraph}
1132          */
1133         protected Paragraph processPara(String line) {
1134                 line = ifUnhtml(line).trim();
1135
1136                 boolean space = true;
1137                 boolean brk = true;
1138                 boolean quote = false;
1139                 boolean tentativeCloseQuote = false;
1140                 char prev = '\0';
1141                 int dashCount = 0;
1142                 long words = 1;
1143
1144                 StringBuilder builder = new StringBuilder();
1145                 for (char car : line.toCharArray()) {
1146                         if (car != '-') {
1147                                 if (dashCount > 0) {
1148                                         // dash, ndash and mdash: - – —
1149                                         // currently: always use mdash
1150                                         builder.append(dashCount == 1 ? '-' : '—');
1151                                 }
1152                                 dashCount = 0;
1153                         }
1154
1155                         if (tentativeCloseQuote) {
1156                                 tentativeCloseQuote = false;
1157                                 if (Character.isLetterOrDigit(car)) {
1158                                         builder.append("'");
1159                                 } else {
1160                                         // handle double-single quotes as double quotes
1161                                         if (prev == car) {
1162                                                 builder.append(closeDoubleQuote);
1163                                                 continue;
1164                                         } else {
1165                                                 builder.append(closeQuote);
1166                                         }
1167                                 }
1168                         }
1169
1170                         switch (car) {
1171                         case ' ': // note: unbreakable space
1172                         case ' ':
1173                         case '\t':
1174                         case '\n': // just in case
1175                         case '\r': // just in case
1176                                 if (builder.length() > 0
1177                                                 && builder.charAt(builder.length() - 1) != ' ') {
1178                                         words++;
1179                                 }
1180                                 builder.append(' ');
1181                                 break;
1182
1183                         case '\'':
1184                                 if (space || (brk && quote)) {
1185                                         quote = true;
1186                                         // handle double-single quotes as double quotes
1187                                         if (prev == car) {
1188                                                 builder.deleteCharAt(builder.length() - 1);
1189                                                 builder.append(openDoubleQuote);
1190                                         } else {
1191                                                 builder.append(openQuote);
1192                                         }
1193                                 } else if (prev == ' ' || prev == car) {
1194                                         // handle double-single quotes as double quotes
1195                                         if (prev == car) {
1196                                                 builder.deleteCharAt(builder.length() - 1);
1197                                                 builder.append(openDoubleQuote);
1198                                         } else {
1199                                                 builder.append(openQuote);
1200                                         }
1201                                 } else {
1202                                         // it is a quote ("I'm off") or a 'quote' ("This
1203                                         // 'good' restaurant"...)
1204                                         tentativeCloseQuote = true;
1205                                 }
1206                                 break;
1207
1208                         case '"':
1209                                 if (space || (brk && quote)) {
1210                                         quote = true;
1211                                         builder.append(openDoubleQuote);
1212                                 } else if (prev == ' ') {
1213                                         builder.append(openDoubleQuote);
1214                                 } else {
1215                                         builder.append(closeDoubleQuote);
1216                                 }
1217                                 break;
1218
1219                         case '-':
1220                                 if (space) {
1221                                         quote = true;
1222                                 } else {
1223                                         dashCount++;
1224                                 }
1225                                 space = false;
1226                                 break;
1227
1228                         case '*':
1229                         case '~':
1230                         case '/':
1231                         case '\\':
1232                         case '<':
1233                         case '>':
1234                         case '=':
1235                         case '+':
1236                         case '_':
1237                         case '–':
1238                         case '—':
1239                                 space = false;
1240                                 builder.append(car);
1241                                 break;
1242
1243                         case '‘':
1244                         case '`':
1245                         case '‹':
1246                         case '﹁':
1247                         case '〈':
1248                         case '「':
1249                                 if (space || (brk && quote)) {
1250                                         quote = true;
1251                                         builder.append(openQuote);
1252                                 } else {
1253                                         // handle double-single quotes as double quotes
1254                                         if (prev == car) {
1255                                                 builder.deleteCharAt(builder.length() - 1);
1256                                                 builder.append(openDoubleQuote);
1257                                         } else {
1258                                                 builder.append(openQuote);
1259                                         }
1260                                 }
1261                                 space = false;
1262                                 brk = false;
1263                                 break;
1264
1265                         case '’':
1266                         case '›':
1267                         case '﹂':
1268                         case '〉':
1269                         case '」':
1270                                 space = false;
1271                                 brk = false;
1272                                 // handle double-single quotes as double quotes
1273                                 if (prev == car) {
1274                                         builder.deleteCharAt(builder.length() - 1);
1275                                         builder.append(closeDoubleQuote);
1276                                 } else {
1277                                         builder.append(closeQuote);
1278                                 }
1279                                 break;
1280
1281                         case '«':
1282                         case '“':
1283                         case '﹃':
1284                         case '《':
1285                         case '『':
1286                                 if (space || (brk && quote)) {
1287                                         quote = true;
1288                                         builder.append(openDoubleQuote);
1289                                 } else {
1290                                         builder.append(openDoubleQuote);
1291                                 }
1292                                 space = false;
1293                                 brk = false;
1294                                 break;
1295
1296                         case '»':
1297                         case '”':
1298                         case '﹄':
1299                         case '》':
1300                         case '』':
1301                                 space = false;
1302                                 brk = false;
1303                                 builder.append(closeDoubleQuote);
1304                                 break;
1305
1306                         default:
1307                                 space = false;
1308                                 brk = false;
1309                                 builder.append(car);
1310                                 break;
1311                         }
1312
1313                         prev = car;
1314                 }
1315
1316                 if (tentativeCloseQuote) {
1317                         tentativeCloseQuote = false;
1318                         builder.append(closeQuote);
1319                 }
1320
1321                 line = builder.toString().trim();
1322
1323                 ParagraphType type = ParagraphType.NORMAL;
1324                 if (space) {
1325                         type = ParagraphType.BLANK;
1326                 } else if (brk) {
1327                         type = ParagraphType.BREAK;
1328                 } else if (quote) {
1329                         type = ParagraphType.QUOTE;
1330                 }
1331
1332                 return new Paragraph(type, line, words);
1333         }
1334
1335         /**
1336          * Remove the HTML from the input <b>if</b> {@link BasicSupport#isHtml()} is
1337          * true.
1338          *
1339          * @param input
1340          *            the input
1341          *
1342          * @return the no html version if needed
1343          */
1344         private String ifUnhtml(String input) {
1345                 if (isHtml() && input != null) {
1346                         return StringUtils.unhtml(input);
1347                 }
1348
1349                 return input;
1350         }
1351
1352         /**
1353          * Return a {@link BasicSupport} implementation supporting the given
1354          * resource if possible.
1355          *
1356          * @param url
1357          *            the story resource
1358          *
1359          * @return an implementation that supports it, or NULL
1360          */
1361         public static BasicSupport getSupport(URL url) {
1362                 if (url == null) {
1363                         return null;
1364                 }
1365
1366                 // TEXT and INFO_TEXT always support files (not URLs though)
1367                 for (SupportType type : SupportType.values()) {
1368                         if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1369                                 BasicSupport support = getSupport(type);
1370                                 if (support != null && support.supports(url)) {
1371                                         return support;
1372                                 }
1373                         }
1374                 }
1375
1376                 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
1377                                 SupportType.TEXT }) {
1378                         BasicSupport support = getSupport(type);
1379                         if (support != null && support.supports(url)) {
1380                                 return support;
1381                         }
1382                 }
1383
1384                 return null;
1385         }
1386
1387         /**
1388          * Return a {@link BasicSupport} implementation supporting the given type.
1389          *
1390          * @param type
1391          *            the type
1392          *
1393          * @return an implementation that supports it, or NULL
1394          */
1395         public static BasicSupport getSupport(SupportType type) {
1396                 switch (type) {
1397                 case EPUB:
1398                         return new Epub().setType(type);
1399                 case INFO_TEXT:
1400                         return new InfoText().setType(type);
1401                 case FIMFICTION:
1402                         return new Fimfiction().setType(type);
1403                 case FANFICTION:
1404                         return new Fanfiction().setType(type);
1405                 case TEXT:
1406                         return new Text().setType(type);
1407                 case MANGAFOX:
1408                         return new MangaFox().setType(type);
1409                 case E621:
1410                         return new E621().setType(type);
1411                 case YIFFSTAR:
1412                         return new YiffStar().setType(type);
1413                 case E_HENTAI:
1414                         return new EHentai().setType(type);
1415                 case CBZ:
1416                         return new Cbz().setType(type);
1417                 case HTML:
1418                         return new Html().setType(type);
1419                 }
1420
1421                 return null;
1422         }
1423
1424         /**
1425          * Return the first line from the given input which correspond to the given
1426          * selectors.
1427          *
1428          * @param in
1429          *            the input
1430          * @param needle
1431          *            a string that must be found inside the target line (also
1432          *            supports "^" at start to say "only if it starts with" the
1433          *            needle)
1434          * @param relativeLine
1435          *            the line to return based upon the target line position (-1 =
1436          *            the line before, 0 = the target line...)
1437          *
1438          * @return the line
1439          */
1440         static String getLine(InputStream in, String needle, int relativeLine) {
1441                 return getLine(in, needle, relativeLine, true);
1442         }
1443
1444         /**
1445          * Return a line from the given input which correspond to the given
1446          * selectors.
1447          *
1448          * @param in
1449          *            the input
1450          * @param needle
1451          *            a string that must be found inside the target line (also
1452          *            supports "^" at start to say "only if it starts with" the
1453          *            needle)
1454          * @param relativeLine
1455          *            the line to return based upon the target line position (-1 =
1456          *            the line before, 0 = the target line...)
1457          * @param first
1458          *            takes the first result (as opposed to the last one, which will
1459          *            also always spend the input)
1460          *
1461          * @return the line
1462          */
1463         static String getLine(InputStream in, String needle, int relativeLine,
1464                         boolean first) {
1465                 String rep = null;
1466
1467                 try {
1468                         in.reset();
1469                 } catch (IOException e) {
1470                         Instance.syserr(e);
1471                 }
1472
1473                 List<String> lines = new ArrayList<String>();
1474                 @SuppressWarnings("resource")
1475                 Scanner scan = new Scanner(in, "UTF-8");
1476                 int index = -1;
1477                 scan.useDelimiter("\\n");
1478                 while (scan.hasNext()) {
1479                         lines.add(scan.next());
1480
1481                         if (index == -1) {
1482                                 if (needle.startsWith("^")) {
1483                                         if (lines.get(lines.size() - 1).startsWith(
1484                                                         needle.substring(1))) {
1485                                                 index = lines.size() - 1;
1486                                         }
1487
1488                                 } else {
1489                                         if (lines.get(lines.size() - 1).contains(needle)) {
1490                                                 index = lines.size() - 1;
1491                                         }
1492                                 }
1493                         }
1494
1495                         if (index >= 0 && index + relativeLine < lines.size()) {
1496                                 rep = lines.get(index + relativeLine);
1497                                 if (first) {
1498                                         break;
1499                                 }
1500                         }
1501                 }
1502
1503                 return rep;
1504         }
1505
1506         /**
1507          * Return the text between the key and the endKey (and optional subKey can
1508          * be passed, in this case we will look for the key first, then take the
1509          * text between the subKey and the endKey).
1510          * <p>
1511          * Will only match the first line with the given key if more than one are
1512          * possible. Which also means that if the subKey or endKey is not found on
1513          * that line, NULL will be returned.
1514          *
1515          * @param in
1516          *            the input
1517          * @param key
1518          *            the key to match (also supports "^" at start to say
1519          *            "only if it starts with" the key)
1520          * @param subKey
1521          *            the sub key or NULL if none
1522          * @param endKey
1523          *            the end key or NULL for "up to the end"
1524          * @return the text or NULL if not found
1525          */
1526         static String getKeyLine(InputStream in, String key, String subKey,
1527                         String endKey) {
1528                 String result = null;
1529
1530                 String line = getLine(in, key, 0);
1531                 if (line != null && line.contains(key)) {
1532                         line = line.substring(line.indexOf(key) + key.length());
1533                         if (subKey == null || subKey.isEmpty() || line.contains(subKey)) {
1534                                 if (subKey != null) {
1535                                         line = line.substring(line.indexOf(subKey)
1536                                                         + subKey.length());
1537                                 }
1538                                 if (endKey == null || line.contains(endKey)) {
1539                                         if (endKey != null) {
1540                                                 line = line.substring(0, line.indexOf(endKey));
1541                                                 result = line;
1542                                         }
1543                                 }
1544                         }
1545                 }
1546
1547                 return result;
1548         }
1549 }