supported/BasicSupport.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.awt.image.BufferedImage;
   4 import java.io.BufferedReader;
   5 import java.io.ByteArrayInputStream;
   6 import java.io.File;
   7 import java.io.IOException;
   8 import java.io.InputStream;
   9 import java.io.InputStreamReader;
  10 import java.net.MalformedURLException;
  11 import java.net.URL;
  12 import java.util.ArrayList;
  13 import java.util.HashMap;
  14 import java.util.List;
  15 import java.util.Map;
  16 import java.util.Map.Entry;
  17 import java.util.Scanner;
  18
  19 import be.nikiroo.fanfix.Instance;
  20 import be.nikiroo.fanfix.bundles.Config;
  21 import be.nikiroo.fanfix.bundles.StringId;
  22 import be.nikiroo.fanfix.data.Chapter;
  23 import be.nikiroo.fanfix.data.MetaData;
  24 import be.nikiroo.fanfix.data.Paragraph;
  25 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
  26 import be.nikiroo.fanfix.data.Story;
  27 import be.nikiroo.utils.IOUtils;
  28 import be.nikiroo.utils.Progress;
  29 import be.nikiroo.utils.StringUtils;
  30
  31 /**
  32  * This class is the base class used by the other support classes. It can be
  33  * used outside of this package, and have static method that you can use to get
  34  * access to the correct support class.
  35  * <p>
  36  * It will be used with 'resources' (usually web pages or files).
  37  *
  38  * @author niki
  39  */
  40 public abstract class BasicSupport {
  41         /**
  42          * The supported input types for which we can get a {@link BasicSupport}
  43          * object.
  44          *
  45          * @author niki
  46          */
  47         public enum SupportType {
  48                 /** EPUB files created with this program */
  49                 EPUB,
  50                 /** Pure text file with some rules */
  51                 TEXT,
  52                 /** TEXT but with associated .info file */
  53                 INFO_TEXT,
  54                 /** My Little Pony fanfictions */
  55                 FIMFICTION,
  56                 /** Fanfictions from a lot of different universes */
  57                 FANFICTION,
  58                 /** Website with lots of Mangas */
  59                 MANGAFOX,
  60                 /** Furry website with comics support */
  61                 E621,
  62                 /** CBZ files */
  63                 CBZ,
  64                 /** HTML files */
  65                 HTML;
  66
  67                 /**
  68                  * A description of this support type (more information than the
  69                  * {@link BasicSupport#getSourceName()}).
  70                  *
  71                  * @return the description
  72                  */
  73                 public String getDesc() {
  74                         String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
  75                                         this.name());
  76
  77                         if (desc == null) {
  78                                 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
  79                         }
  80
  81                         return desc;
  82                 }
  83
  84                 /**
  85                  * The name of this support type (a short version).
  86                  *
  87                  * @return the name
  88                  */
  89                 public String getSourceName() {
  90                         BasicSupport support = BasicSupport.getSupport(this);
  91                         if (support != null) {
  92                                 return support.getSourceName();
  93                         }
  94
  95                         return null;
  96                 }
  97
  98                 @Override
  99                 public String toString() {
 100                         return super.toString().toLowerCase();
 101                 }
 102
 103                 /**
 104                  * Call {@link SupportType#valueOf(String.toUpperCase())}.
 105                  *
 106                  * @param typeName
 107                  *            the possible type name
 108                  *
 109                  * @return NULL or the type
 110                  */
 111                 public static SupportType valueOfUC(String typeName) {
 112                         return SupportType.valueOf(typeName == null ? null : typeName
 113                                         .toUpperCase());
 114                 }
 115
 116                 /**
 117                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 118                  * NULL for NULL instead of raising exception.
 119                  *
 120                  * @param typeName
 121                  *            the possible type name
 122                  *
 123                  * @return NULL or the type
 124                  */
 125                 public static SupportType valueOfNullOkUC(String typeName) {
 126                         if (typeName == null) {
 127                                 return null;
 128                         }
 129
 130                         return SupportType.valueOfUC(typeName);
 131                 }
 132
 133                 /**
 134                  * Call {@link SupportType#valueOf(String.toUpperCase())} but return
 135                  * NULL in case of error instead of raising an exception.
 136                  *
 137                  * @param typeName
 138                  *            the possible type name
 139                  *
 140                  * @return NULL or the type
 141                  */
 142                 public static SupportType valueOfAllOkUC(String typeName) {
 143                         try {
 144                                 return SupportType.valueOfUC(typeName);
 145                         } catch (Exception e) {
 146                                 return null;
 147                         }
 148                 }
 149         }
 150
 151         private InputStream in;
 152         private SupportType type;
 153         private URL currentReferer; // with on 'r', as in 'HTTP'...
 154
 155         // quote chars
 156         private char openQuote = Instance.getTrans().getChar(
 157                         StringId.OPEN_SINGLE_QUOTE);
 158         private char closeQuote = Instance.getTrans().getChar(
 159                         StringId.CLOSE_SINGLE_QUOTE);
 160         private char openDoubleQuote = Instance.getTrans().getChar(
 161                         StringId.OPEN_DOUBLE_QUOTE);
 162         private char closeDoubleQuote = Instance.getTrans().getChar(
 163                         StringId.CLOSE_DOUBLE_QUOTE);
 164
 165         /**
 166          * The name of this support class.
 167          *
 168          * @return the name
 169          */
 170         protected abstract String getSourceName();
 171
 172         /**
 173          * Check if the given resource is supported by this {@link BasicSupport}.
 174          *
 175          * @param url
 176          *            the resource to check for
 177          *
 178          * @return TRUE if it is
 179          */
 180         protected abstract boolean supports(URL url);
 181
 182         /**
 183          * Return TRUE if the support will return HTML encoded content values for
 184          * the chapters content.
 185          *
 186          * @return TRUE for HTML
 187          */
 188         protected abstract boolean isHtml();
 189
 190         protected abstract MetaData getMeta(URL source, InputStream in)
 191                         throws IOException;
 192
 193         /**
 194          * Return the story description.
 195          *
 196          * @param source
 197          *            the source of the story
 198          * @param in
 199          *            the input (the main resource)
 200          *
 201          * @return the description
 202          *
 203          * @throws IOException
 204          *             in case of I/O error
 205          */
 206         protected abstract String getDesc(URL source, InputStream in)
 207                         throws IOException;
 208
 209         /**
 210          * Return the list of chapters (name and resource).
 211          *
 212          * @param source
 213          *            the source of the story
 214          * @param in
 215          *            the input (the main resource)
 216          *
 217          * @return the chapters
 218          *
 219          * @throws IOException
 220          *             in case of I/O error
 221          */
 222         protected abstract List<Entry<String, URL>> getChapters(URL source,
 223                         InputStream in) throws IOException;
 224
 225         /**
 226          * Return the content of the chapter (possibly HTML encoded, if
 227          * {@link BasicSupport#isHtml()} is TRUE).
 228          *
 229          * @param source
 230          *            the source of the story
 231          * @param in
 232          *            the input (the main resource)
 233          * @param number
 234          *            the chapter number
 235          *
 236          * @return the content
 237          *
 238          * @throws IOException
 239          *             in case of I/O error
 240          */
 241         protected abstract String getChapterContent(URL source, InputStream in,
 242                         int number) throws IOException;
 243
 244         /**
 245          * Return the list of cookies (values included) that must be used to
 246          * correctly fetch the resources.
 247          * <p>
 248          * You are expected to call the super method implementation if you override
 249          * it.
 250          *
 251          * @return the cookies
 252          */
 253         public Map<String, String> getCookies() {
 254                 return new HashMap<String, String>();
 255         }
 256
 257         /**
 258          * Process the given story resource into a partially filled {@link Story}
 259          * object containing the name and metadata, except for the description.
 260          *
 261          * @param url
 262          *            the story resource
 263          *
 264          * @return the {@link Story}
 265          *
 266          * @throws IOException
 267          *             in case of I/O error
 268          */
 269         public Story processMeta(URL url) throws IOException {
 270                 return processMeta(url, true, false);
 271         }
 272
 273         /**
 274          * Process the given story resource into a partially filled {@link Story}
 275          * object containing the name and metadata.
 276          *
 277          * @param url
 278          *            the story resource
 279          *
 280          * @param close
 281          *            close "this" and "in" when done
 282          *
 283          * @return the {@link Story}
 284          *
 285          * @throws IOException
 286          *             in case of I/O error
 287          */
 288         protected Story processMeta(URL url, boolean close, boolean getDesc)
 289                         throws IOException {
 290                 in = openInput(url);
 291                 if (in == null) {
 292                         return null;
 293                 }
 294
 295                 try {
 296                         preprocess(url, getInput());
 297
 298                         Story story = new Story();
 299                         MetaData meta = getMeta(url, getInput());
 300                         story.setMeta(meta);
 301
 302                         if (meta != null && meta.getCover() == null) {
 303                                 meta.setCover(getDefaultCover(meta.getSubject()));
 304                         }
 305
 306                         if (getDesc) {
 307                                 String descChapterName = Instance.getTrans().getString(
 308                                                 StringId.DESCRIPTION);
 309                                 story.getMeta().setResume(
 310                                                 makeChapter(url, 0, descChapterName,
 311                                                                 getDesc(url, getInput())));
 312                         }
 313
 314                         return story;
 315                 } finally {
 316                         if (close) {
 317                                 try {
 318                                         close();
 319                                 } catch (IOException e) {
 320                                         Instance.syserr(e);
 321                                 }
 322
 323                                 if (in != null) {
 324                                         in.close();
 325                                 }
 326                         }
 327                 }
 328         }
 329
 330         /**
 331          * Process the given story resource into a fully filled {@link Story}
 332          * object.
 333          *
 334          * @param url
 335          *            the story resource
 336          * @param pg
 337          *            the optional progress reporter
 338          *
 339          * @return the {@link Story}
 340          *
 341          * @throws IOException
 342          *             in case of I/O error
 343          */
 344         public Story process(URL url, Progress pg) throws IOException {
 345                 if (pg == null) {
 346                         pg = new Progress();
 347                 } else {
 348                         pg.setMinMax(0, 100);
 349                 }
 350
 351                 setCurrentReferer(url);
 352
 353                 pg.setProgress(1);
 354                 try {
 355                         Story story = processMeta(url, false, true);
 356                         pg.setProgress(10);
 357                         if (story == null) {
 358                                 pg.setProgress(100);
 359                                 return null;
 360                         }
 361
 362                         story.setChapters(new ArrayList<Chapter>());
 363
 364                         List<Entry<String, URL>> chapters = getChapters(url, getInput());
 365                         pg.setProgress(20);
 366
 367                         int i = 1;
 368                         if (chapters != null) {
 369                                 Progress pgChaps = new Progress(0, chapters.size());
 370                                 pg.addProgress(pgChaps, 80);
 371
 372                                 for (Entry<String, URL> chap : chapters) {
 373                                         setCurrentReferer(chap.getValue());
 374                                         InputStream chapIn = Instance.getCache().open(
 375                                                         chap.getValue(), this, true);
 376                                         try {
 377                                                 story.getChapters().add(
 378                                                                 makeChapter(url, i, chap.getKey(),
 379                                                                                 getChapterContent(url, chapIn, i)));
 380                                         } finally {
 381                                                 chapIn.close();
 382                                         }
 383
 384                                         pgChaps.setProgress(i++);
 385                                 }
 386                         } else {
 387                                 pg.setProgress(100);
 388                         }
 389
 390                         return story;
 391
 392                 } finally {
 393                         try {
 394                                 close();
 395                         } catch (IOException e) {
 396                                 Instance.syserr(e);
 397                         }
 398
 399                         if (in != null) {
 400                                 in.close();
 401                         }
 402
 403                         currentReferer = null;
 404                 }
 405         }
 406
 407         /**
 408          * The support type.$
 409          *
 410          * @return the type
 411          */
 412         public SupportType getType() {
 413                 return type;
 414         }
 415
 416         /**
 417          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 418          * the current {@link URL} we work on.
 419          *
 420          * @return the referer
 421          */
 422         public URL getCurrentReferer() {
 423                 return currentReferer;
 424         }
 425
 426         /**
 427          * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
 428          * the current {@link URL} we work on.
 429          *
 430          * @param currentReferer
 431          *            the new referer
 432          */
 433         protected void setCurrentReferer(URL currentReferer) {
 434                 this.currentReferer = currentReferer;
 435         }
 436
 437         /**
 438          * The support type.
 439          *
 440          * @param type
 441          *            the new type
 442          *
 443          * @return this
 444          */
 445         protected BasicSupport setType(SupportType type) {
 446                 this.type = type;
 447                 return this;
 448         }
 449
 450         /**
 451          * Prepare the support if needed before processing.
 452          *
 453          * @param source
 454          *            the source of the story
 455          * @param in
 456          *            the input (the main resource)
 457          *
 458          * @throws IOException
 459          *             on I/O error
 460          */
 461         protected void preprocess(URL source, InputStream in) throws IOException {
 462         }
 463
 464         /**
 465          * Now that we have processed the {@link Story}, close the resources if any.
 466          *
 467          * @throws IOException
 468          *             on I/O error
 469          */
 470         protected void close() throws IOException {
 471         }
 472
 473         /**
 474          * Create a {@link Chapter} object from the given information, formatting
 475          * the content as it should be.
 476          *
 477          * @param number
 478          *            the chapter number
 479          * @param name
 480          *            the chapter name
 481          * @param content
 482          *            the chapter content
 483          *
 484          * @return the {@link Chapter}
 485          *
 486          * @throws IOException
 487          *             in case of I/O error
 488          */
 489         protected Chapter makeChapter(URL source, int number, String name,
 490                         String content) throws IOException {
 491                 // Chapter name: process it correctly, then remove the possible
 492                 // redundant "Chapter x: " in front of it
 493                 String chapterName = processPara(name).getContent().trim();
 494                 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
 495                                 .split(",")) {
 496                         String chapterWord = Instance.getConfig().getStringX(
 497                                         Config.CHAPTER, lang);
 498                         if (chapterName.startsWith(chapterWord)) {
 499                                 chapterName = chapterName.substring(chapterWord.length())
 500                                                 .trim();
 501                                 break;
 502                         }
 503                 }
 504
 505                 if (chapterName.startsWith(Integer.toString(number))) {
 506                         chapterName = chapterName.substring(
 507                                         Integer.toString(number).length()).trim();
 508                 }
 509
 510                 if (chapterName.startsWith(":")) {
 511                         chapterName = chapterName.substring(1).trim();
 512                 }
 513                 //
 514
 515                 Chapter chap = new Chapter(number, chapterName);
 516
 517                 if (content != null) {
 518                         chap.setParagraphs(makeParagraphs(source, content));
 519                 }
 520
 521                 return chap;
 522
 523         }
 524
 525         /**
 526          * Convert the given content into {@link Paragraph}s.
 527          *
 528          * @param source
 529          *            the source URL of the story
 530          * @param content
 531          *            the textual content
 532          *
 533          * @return the {@link Paragraph}s
 534          *
 535          * @throws IOException
 536          *             in case of I/O error
 537          */
 538         protected List<Paragraph> makeParagraphs(URL source, String content)
 539                         throws IOException {
 540                 if (isHtml()) {
 541                         // Special <HR> processing:
 542                         content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
 543                                         "\n* * *\n");
 544                 }
 545
 546                 List<Paragraph> paras = new ArrayList<Paragraph>();
 547                 InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8"));
 548                 try {
 549                         BufferedReader buff = new BufferedReader(new InputStreamReader(in,
 550                                         "UTF-8"));
 551
 552                         for (String encodedLine = buff.readLine(); encodedLine != null; encodedLine = buff
 553                                         .readLine()) {
 554                                 String lines[];
 555                                 if (isHtml()) {
 556                                         lines = encodedLine.split("(<p>|</p>|<br>|<br/>|\\n)");
 557                                 } else {
 558                                         lines = new String[] { encodedLine };
 559                                 }
 560
 561                                 for (String aline : lines) {
 562                                         String line = aline.trim();
 563
 564                                         URL image = null;
 565                                         if (line.startsWith("[") && line.endsWith("]")) {
 566                                                 image = getImageUrl(this, source,
 567                                                                 line.substring(1, line.length() - 1).trim());
 568                                         }
 569
 570                                         if (image != null) {
 571                                                 paras.add(new Paragraph(image));
 572                                         } else {
 573                                                 paras.add(processPara(line));
 574                                         }
 575                                 }
 576                         }
 577                 } finally {
 578                         in.close();
 579                 }
 580
 581                 // Check quotes for "bad" format
 582                 List<Paragraph> newParas = new ArrayList<Paragraph>();
 583                 for (Paragraph para : paras) {
 584                         newParas.addAll(requotify(para));
 585                 }
 586                 paras = newParas;
 587
 588                 // Remove double blanks/brks
 589                 fixBlanksBreaks(paras);
 590
 591                 return paras;
 592         }
 593
 594         /**
 595          * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
 596          * those {@link Paragraph}s.
 597          * <p>
 598          * The resulting list will not contain a starting or trailing blank/break
 599          * nor 2 blanks or breaks following each other.
 600          *
 601          * @param paras
 602          *            the list of {@link Paragraph}s to fix
 603          */
 604         protected void fixBlanksBreaks(List<Paragraph> paras) {
 605                 boolean space = false;
 606                 boolean brk = true;
 607                 for (int i = 0; i < paras.size(); i++) {
 608                         Paragraph para = paras.get(i);
 609                         boolean thisSpace = para.getType() == ParagraphType.BLANK;
 610                         boolean thisBrk = para.getType() == ParagraphType.BREAK;
 611
 612                         if (i > 0 && space && thisBrk) {
 613                                 paras.remove(i - 1);
 614                                 i--;
 615                         } else if ((space || brk) && (thisSpace || thisBrk)) {
 616                                 paras.remove(i);
 617                                 i--;
 618                         }
 619
 620                         space = thisSpace;
 621                         brk = thisBrk;
 622                 }
 623
 624                 // Remove blank/brk at start
 625                 if (paras.size() > 0
 626                                 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
 627                                                 0).getType() == ParagraphType.BREAK)) {
 628                         paras.remove(0);
 629                 }
 630
 631                 // Remove blank/brk at end
 632                 int last = paras.size() - 1;
 633                 if (paras.size() > 0
 634                                 && (paras.get(last).getType() == ParagraphType.BLANK || paras
 635                                                 .get(last).getType() == ParagraphType.BREAK)) {
 636                         paras.remove(last);
 637                 }
 638         }
 639
 640         /**
 641          * Get the default cover related to this subject (see <tt>.info</tt> files).
 642          *
 643          * @param subject
 644          *            the subject
 645          *
 646          * @return the cover if any, or NULL
 647          */
 648         static BufferedImage getDefaultCover(String subject) {
 649                 if (subject != null && !subject.isEmpty()
 650                                 && Instance.getCoverDir() != null) {
 651                         try {
 652                                 File fileCover = new File(Instance.getCoverDir(), subject);
 653                                 return getImage(null, fileCover.toURI().toURL(), subject);
 654                         } catch (MalformedURLException e) {
 655                         }
 656                 }
 657
 658                 return null;
 659         }
 660
 661         /**
 662          * Return the list of supported image extensions.
 663          *
 664          * @return the extensions
 665          */
 666         static String[] getImageExt(boolean emptyAllowed) {
 667                 if (emptyAllowed) {
 668                         return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 669                 } else {
 670                         return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 671                 }
 672         }
 673
 674         static BufferedImage getImage(BasicSupport support, URL source, String line) {
 675                 URL url = getImageUrl(support, source, line);
 676                 if (url != null) {
 677                         InputStream in = null;
 678                         try {
 679                                 in = Instance.getCache().open(url, getSupport(url), true);
 680                                 return IOUtils.toImage(in);
 681                         } catch (IOException e) {
 682                         } finally {
 683                                 if (in != null) {
 684                                         try {
 685                                                 in.close();
 686                                         } catch (IOException e) {
 687                                         }
 688                                 }
 689                         }
 690                 }
 691
 692                 return null;
 693         }
 694
 695         /**
 696          * Check if the given resource can be a local image or a remote image, then
 697          * refresh the cache with it if it is.
 698          *
 699          * @param source
 700          *            the story source
 701          * @param line
 702          *            the resource to check
 703          *
 704          * @return the image URL if found, or NULL
 705          *
 706          */
 707         static URL getImageUrl(BasicSupport support, URL source, String line) {
 708                 URL url = null;
 709
 710                 if (line != null) {
 711                         // try for files
 712                         String path = null;
 713                         if (source != null) {
 714                                 path = new File(source.getFile()).getParent();
 715                                 try {
 716                                         String basePath = new File(new File(path), line.trim())
 717                                                         .getAbsolutePath();
 718                                         for (String ext : getImageExt(true)) {
 719                                                 if (new File(basePath + ext).exists()) {
 720                                                         url = new File(basePath + ext).toURI().toURL();
 721                                                 }
 722                                         }
 723                                 } catch (Exception e) {
 724                                         // Nothing to do here
 725                                 }
 726                         }
 727
 728                         if (url == null) {
 729                                 // try for URLs
 730                                 try {
 731                                         for (String ext : getImageExt(true)) {
 732                                                 if (Instance.getCache().check(new URL(line + ext))) {
 733                                                         url = new URL(line + ext);
 734                                                         break;
 735                                                 }
 736                                         }
 737
 738                                         // try out of cache
 739                                         if (url == null) {
 740                                                 for (String ext : getImageExt(true)) {
 741                                                         try {
 742                                                                 url = new URL(line + ext);
 743                                                                 Instance.getCache().refresh(url, support, true);
 744                                                                 break;
 745                                                         } catch (IOException e) {
 746                                                                 // no image with this ext
 747                                                                 url = null;
 748                                                         }
 749                                                 }
 750                                         }
 751                                 } catch (MalformedURLException e) {
 752                                         // Not an url
 753                                 }
 754                         }
 755
 756                         // refresh the cached file
 757                         if (url != null) {
 758                                 try {
 759                                         Instance.getCache().refresh(url, support, true);
 760                                 } catch (IOException e) {
 761                                         // woops, broken image
 762                                         url = null;
 763                                 }
 764                         }
 765                 }
 766
 767                 return url;
 768         }
 769
 770         /**
 771          * Open the input file that will be used through the support.
 772          *
 773          * @param source
 774          *            the source {@link URL}
 775          *
 776          * @return the {@link InputStream}
 777          *
 778          * @throws IOException
 779          *             in case of I/O error
 780          */
 781         protected InputStream openInput(URL source) throws IOException {
 782                 return Instance.getCache().open(source, this, false);
 783         }
 784
 785         protected InputStream reset(InputStream in) {
 786                 try {
 787                         in.reset();
 788                 } catch (IOException e) {
 789                 }
 790                 return in;
 791         }
 792
 793         /**
 794          * Reset then return {@link BasicSupport#in}.
 795          *
 796          * @return {@link BasicSupport#in}
 797          */
 798         protected InputStream getInput() {
 799                 return reset(in);
 800         }
 801
 802         /**
 803          * Fix the author name if it is prefixed with some "by" {@link String}.
 804          *
 805          * @param author
 806          *            the author with a possible prefix
 807          *
 808          * @return the author without prefixes
 809          */
 810         protected String fixAuthor(String author) {
 811                 if (author != null) {
 812                         for (String suffix : new String[] { " ", ":" }) {
 813                                 for (String byString : Instance.getConfig()
 814                                                 .getString(Config.BYS).split(",")) {
 815                                         byString += suffix;
 816                                         if (author.toUpperCase().startsWith(byString.toUpperCase())) {
 817                                                 author = author.substring(byString.length()).trim();
 818                                         }
 819                                 }
 820                         }
 821
 822                         // Special case (without suffix):
 823                         if (author.startsWith("©")) {
 824                                 author = author.substring(1);
 825                         }
 826                 }
 827
 828                 return author;
 829         }
 830
 831         /**
 832          * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
 833          * and requotify them (i.e., separate them into QUOTE paragraphs and other
 834          * paragraphs (quotes or not)).
 835          *
 836          * @param para
 837          *            the paragraph to requotify (not necessaraly a quote)
 838          *
 839          * @return the correctly (or so we hope) quotified paragraphs
 840          */
 841         protected List<Paragraph> requotify(Paragraph para) {
 842                 List<Paragraph> newParas = new ArrayList<Paragraph>();
 843
 844                 if (para.getType() == ParagraphType.QUOTE
 845                                 && para.getContent().length() > 2) {
 846                         String line = para.getContent();
 847                         boolean singleQ = line.startsWith("" + openQuote);
 848                         boolean doubleQ = line.startsWith("" + openDoubleQuote);
 849
 850                         // Do not try when more than one quote at a time
 851                         // (some stories are not easily readable if we do)
 852                         if (singleQ
 853                                         && line.indexOf(closeQuote, 1) < line
 854                                                         .lastIndexOf(closeQuote)) {
 855                                 newParas.add(para);
 856                                 return newParas;
 857                         }
 858                         if (doubleQ
 859                                         && line.indexOf(closeDoubleQuote, 1) < line
 860                                                         .lastIndexOf(closeDoubleQuote)) {
 861                                 newParas.add(para);
 862                                 return newParas;
 863                         }
 864                         //
 865
 866                         if (!singleQ && !doubleQ) {
 867                                 line = openDoubleQuote + line + closeDoubleQuote;
 868                                 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
 869                         } else {
 870                                 char open = singleQ ? openQuote : openDoubleQuote;
 871                                 char close = singleQ ? closeQuote : closeDoubleQuote;
 872
 873                                 int posDot = -1;
 874                                 boolean inQuote = false;
 875                                 int i = 0;
 876                                 for (char car : line.toCharArray()) {
 877                                         if (car == open) {
 878                                                 inQuote = true;
 879                                         } else if (car == close) {
 880                                                 inQuote = false;
 881                                         } else if (car == '.' && !inQuote) {
 882                                                 posDot = i;
 883                                                 break;
 884                                         }
 885                                         i++;
 886                                 }
 887
 888                                 if (posDot >= 0) {
 889                                         String rest = line.substring(posDot + 1).trim();
 890                                         line = line.substring(0, posDot + 1).trim();
 891                                         newParas.add(new Paragraph(ParagraphType.QUOTE, line));
 892                                         if (!rest.isEmpty()) {
 893                                                 newParas.addAll(requotify(processPara(rest)));
 894                                         }
 895                                 } else {
 896                                         newParas.add(para);
 897                                 }
 898                         }
 899                 } else {
 900                         newParas.add(para);
 901                 }
 902
 903                 return newParas;
 904         }
 905
 906         /**
 907          * Process a {@link Paragraph} from a raw line of text.
 908          * <p>
 909          * Will also fix quotes and HTML encoding if needed.
 910          *
 911          * @param line
 912          *            the raw line
 913          *
 914          * @return the processed {@link Paragraph}
 915          */
 916         private Paragraph processPara(String line) {
 917                 line = ifUnhtml(line).trim();
 918
 919                 boolean space = true;
 920                 boolean brk = true;
 921                 boolean quote = false;
 922                 boolean tentativeCloseQuote = false;
 923                 char prev = '\0';
 924                 int dashCount = 0;
 925
 926                 StringBuilder builder = new StringBuilder();
 927                 for (char car : line.toCharArray()) {
 928                         if (car != '-') {
 929                                 if (dashCount > 0) {
 930                                         // dash, ndash and mdash: - – —
 931                                         // currently: always use mdash
 932                                         builder.append(dashCount == 1 ? '-' : '—');
 933                                 }
 934                                 dashCount = 0;
 935                         }
 936
 937                         if (tentativeCloseQuote) {
 938                                 tentativeCloseQuote = false;
 939                                 if ((car >= 'a' && car <= 'z') || (car >= 'A' && car <= 'Z')
 940                                                 || (car >= '0' && car <= '9')) {
 941                                         builder.append("'");
 942                                 } else {
 943                                         builder.append(closeQuote);
 944                                 }
 945                         }
 946
 947                         switch (car) {
 948                         case ' ': // note: unbreakable space
 949                         case ' ':
 950                         case '\t':
 951                         case '\n': // just in case
 952                         case '\r': // just in case
 953                                 builder.append(' ');
 954                                 break;
 955
 956                         case '\'':
 957                                 if (space || (brk && quote)) {
 958                                         quote = true;
 959                                         builder.append(openQuote);
 960                                 } else if (prev == ' ') {
 961                                         builder.append(openQuote);
 962                                 } else {
 963                                         // it is a quote ("I'm off") or a 'quote' ("This
 964                                         // 'good' restaurant"...)
 965                                         tentativeCloseQuote = true;
 966                                 }
 967                                 break;
 968
 969                         case '"':
 970                                 if (space || (brk && quote)) {
 971                                         quote = true;
 972                                         builder.append(openDoubleQuote);
 973                                 } else if (prev == ' ') {
 974                                         builder.append(openDoubleQuote);
 975                                 } else {
 976                                         builder.append(closeDoubleQuote);
 977                                 }
 978                                 break;
 979
 980                         case '-':
 981                                 if (space) {
 982                                         quote = true;
 983                                 } else {
 984                                         dashCount++;
 985                                 }
 986                                 space = false;
 987                                 break;
 988
 989                         case '*':
 990                         case '~':
 991                         case '/':
 992                         case '\\':
 993                         case '<':
 994                         case '>':
 995                         case '=':
 996                         case '+':
 997                         case '_':
 998                         case '–':
 999                         case '—':
1000                                 space = false;
1001                                 builder.append(car);
1002                                 break;
1003
1004                         case '‘':
1005                         case '`':
1006                         case '‹':
1007                         case '﹁':
1008                         case '〈':
1009                         case '「':
1010                                 if (space || (brk && quote)) {
1011                                         quote = true;
1012                                         builder.append(openQuote);
1013                                 } else {
1014                                         builder.append(openQuote);
1015                                 }
1016                                 space = false;
1017                                 brk = false;
1018                                 break;
1019
1020                         case '’':
1021                         case '›':
1022                         case '﹂':
1023                         case '〉':
1024                         case '」':
1025                                 space = false;
1026                                 brk = false;
1027                                 builder.append(closeQuote);
1028                                 break;
1029
1030                         case '«':
1031                         case '“':
1032                         case '﹃':
1033                         case '《':
1034                         case '『':
1035                                 if (space || (brk && quote)) {
1036                                         quote = true;
1037                                         builder.append(openDoubleQuote);
1038                                 } else {
1039                                         builder.append(openDoubleQuote);
1040                                 }
1041                                 space = false;
1042                                 brk = false;
1043                                 break;
1044
1045                         case '»':
1046                         case '”':
1047                         case '﹄':
1048                         case '》':
1049                         case '』':
1050                                 space = false;
1051                                 brk = false;
1052                                 builder.append(closeDoubleQuote);
1053                                 break;
1054
1055                         default:
1056                                 space = false;
1057                                 brk = false;
1058                                 builder.append(car);
1059                                 break;
1060                         }
1061
1062                         prev = car;
1063                 }
1064
1065                 if (tentativeCloseQuote) {
1066                         tentativeCloseQuote = false;
1067                         builder.append(closeQuote);
1068                 }
1069
1070                 line = builder.toString().trim();
1071
1072                 ParagraphType type = ParagraphType.NORMAL;
1073                 if (space) {
1074                         type = ParagraphType.BLANK;
1075                 } else if (brk) {
1076                         type = ParagraphType.BREAK;
1077                 } else if (quote) {
1078                         type = ParagraphType.QUOTE;
1079                 }
1080
1081                 return new Paragraph(type, line);
1082         }
1083
1084         /**
1085          * Remove the HTML from the inpit <b>if</b> {@link BasicSupport#isHtml()} is
1086          * true.
1087          *
1088          * @param input
1089          *            the input
1090          *
1091          * @return the no html version if needed
1092          */
1093         private String ifUnhtml(String input) {
1094                 if (isHtml() && input != null) {
1095                         return StringUtils.unhtml(input);
1096                 }
1097
1098                 return input;
1099         }
1100
1101         /**
1102          * Return a {@link BasicSupport} implementation supporting the given
1103          * resource if possible.
1104          *
1105          * @param url
1106          *            the story resource
1107          *
1108          * @return an implementation that supports it, or NULL
1109          */
1110         public static BasicSupport getSupport(URL url) {
1111                 if (url == null) {
1112                         return null;
1113                 }
1114
1115                 // TEXT and INFO_TEXT always support files (not URLs though)
1116                 for (SupportType type : SupportType.values()) {
1117                         if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1118                                 BasicSupport support = getSupport(type);
1119                                 if (support != null && support.supports(url)) {
1120                                         return support;
1121                                 }
1122                         }
1123                 }
1124
1125                 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
1126                                 SupportType.TEXT }) {
1127                         BasicSupport support = getSupport(type);
1128                         if (support != null && support.supports(url)) {
1129                                 return support;
1130                         }
1131                 }
1132
1133                 return null;
1134         }
1135
1136         /**
1137          * Return a {@link BasicSupport} implementation supporting the given type.
1138          *
1139          * @param type
1140          *            the type
1141          *
1142          * @return an implementation that supports it, or NULL
1143          */
1144         public static BasicSupport getSupport(SupportType type) {
1145                 switch (type) {
1146                 case EPUB:
1147                         return new Epub().setType(type);
1148                 case INFO_TEXT:
1149                         return new InfoText().setType(type);
1150                 case FIMFICTION:
1151                         return new Fimfiction().setType(type);
1152                 case FANFICTION:
1153                         return new Fanfiction().setType(type);
1154                 case TEXT:
1155                         return new Text().setType(type);
1156                 case MANGAFOX:
1157                         return new MangaFox().setType(type);
1158                 case E621:
1159                         return new E621().setType(type);
1160                 case CBZ:
1161                         return new Cbz().setType(type);
1162                 case HTML:
1163                         return new Html().setType(type);
1164                 }
1165
1166                 return null;
1167         }
1168
1169         /**
1170          * Return the first line from the given input which correspond to the given
1171          * selectors.
1172          *
1173          * @param in
1174          *            the input
1175          * @param needle
1176          *            a string that must be found inside the target line (also
1177          *            supports "^" at start to say "only if it starts with" the
1178          *            needle)
1179          * @param relativeLine
1180          *            the line to return based upon the target line position (-1 =
1181          *            the line before, 0 = the target line...)
1182          *
1183          * @return the line
1184          */
1185         static String getLine(InputStream in, String needle, int relativeLine) {
1186                 return getLine(in, needle, relativeLine, true);
1187         }
1188
1189         /**
1190          * Return a line from the given input which correspond to the given
1191          * selectors.
1192          *
1193          * @param in
1194          *            the input
1195          * @param needle
1196          *            a string that must be found inside the target line (also
1197          *            supports "^" at start to say "only if it starts with" the
1198          *            needle)
1199          * @param relativeLine
1200          *            the line to return based upon the target line position (-1 =
1201          *            the line before, 0 = the target line...)
1202          * @param first
1203          *            takes the first result (as opposed to the last one, which will
1204          *            also always spend the input)
1205          *
1206          * @return the line
1207          */
1208         static String getLine(InputStream in, String needle, int relativeLine,
1209                         boolean first) {
1210                 String rep = null;
1211
1212                 try {
1213                         in.reset();
1214                 } catch (IOException e) {
1215                         Instance.syserr(e);
1216                 }
1217
1218                 List<String> lines = new ArrayList<String>();
1219                 @SuppressWarnings("resource")
1220                 Scanner scan = new Scanner(in, "UTF-8");
1221                 int index = -1;
1222                 scan.useDelimiter("\\n");
1223                 while (scan.hasNext()) {
1224                         lines.add(scan.next());
1225
1226                         if (index == -1) {
1227                                 if (needle.startsWith("^")) {
1228                                         if (lines.get(lines.size() - 1).startsWith(
1229                                                         needle.substring(1))) {
1230                                                 index = lines.size() - 1;
1231                                         }
1232
1233                                 } else {
1234                                         if (lines.get(lines.size() - 1).contains(needle)) {
1235                                                 index = lines.size() - 1;
1236                                         }
1237                                 }
1238                         }
1239
1240                         if (index >= 0 && index + relativeLine < lines.size()) {
1241                                 rep = lines.get(index + relativeLine);
1242                                 if (first) {
1243                                         break;
1244                                 }
1245                         }
1246                 }
1247
1248                 return rep;
1249         }
1250 }