src/be/nikiroo/fanfix/supported/BasicSupport_Deprecated.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.io.BufferedReader;
   4 import java.io.ByteArrayInputStream;
   5 import java.io.File;
   6 import java.io.IOException;
   7 import java.io.InputStream;
   8 import java.io.InputStreamReader;
   9 import java.net.MalformedURLException;
  10 import java.net.URL;
  11 import java.util.ArrayList;
  12 import java.util.Date;
  13 import java.util.List;
  14 import java.util.Map.Entry;
  15 import java.util.Scanner;
  16
  17 import be.nikiroo.fanfix.Instance;
  18 import be.nikiroo.fanfix.bundles.Config;
  19 import be.nikiroo.fanfix.bundles.StringId;
  20 import be.nikiroo.fanfix.data.Chapter;
  21 import be.nikiroo.fanfix.data.MetaData;
  22 import be.nikiroo.fanfix.data.Paragraph;
  23 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
  24 import be.nikiroo.fanfix.data.Story;
  25 import be.nikiroo.utils.Image;
  26 import be.nikiroo.utils.Progress;
  27 import be.nikiroo.utils.StringUtils;
  28
  29 /**
  30  * DEPRECATED: use the new Jsoup 'Node' system.
  31  * <p>
  32  * This class is the base class used by the other support classes. It can be
  33  * used outside of this package, and have static method that you can use to get
  34  * access to the correct support class.
  35  * <p>
  36  * It will be used with 'resources' (usually web pages or files).
  37  *
  38  * @author niki
  39  */
  40 @Deprecated
  41 public abstract class BasicSupport_Deprecated extends BasicSupport {
  42         private InputStream in;
  43         private URL currentReferer; // with only one 'r', as in 'HTTP'...
  44
  45         // quote chars
  46         private char openQuote = Instance.getTrans().getCharacter(
  47                         StringId.OPEN_SINGLE_QUOTE);
  48         private char closeQuote = Instance.getTrans().getCharacter(
  49                         StringId.CLOSE_SINGLE_QUOTE);
  50         private char openDoubleQuote = Instance.getTrans().getCharacter(
  51                         StringId.OPEN_DOUBLE_QUOTE);
  52         private char closeDoubleQuote = Instance.getTrans().getCharacter(
  53                         StringId.CLOSE_DOUBLE_QUOTE);
  54
  55         // New methods not used in Deprecated mode
  56         @Override
  57         protected String getDesc() throws IOException {
  58                 throw new RuntimeException("should not be used by legacy code");
  59         }
  60
  61         @Override
  62         protected MetaData getMeta() throws IOException {
  63                 throw new RuntimeException("should not be used by legacy code");
  64         }
  65
  66         @Override
  67         protected List<Entry<String, URL>> getChapters(Progress pg)
  68                         throws IOException {
  69                 throw new RuntimeException("should not be used by legacy code");
  70         }
  71
  72         @Override
  73         protected String getChapterContent(URL chapUrl, int number, Progress pg)
  74                         throws IOException {
  75                 throw new RuntimeException("should not be used by legacy code");
  76         }
  77
  78         @Override
  79         public Story process(Progress pg) throws IOException {
  80                 return process(getSource(), pg);
  81         }
  82
  83         //
  84
  85         /**
  86          * Return the {@link MetaData} of this story.
  87          *
  88          * @param source
  89          *            the source of the story
  90          * @param in
  91          *            the input (the main resource)
  92          *
  93          * @return the associated {@link MetaData}, never NULL
  94          *
  95          * @throws IOException
  96          *             in case of I/O error
  97          */
  98         protected abstract MetaData getMeta(URL source, InputStream in)
  99                         throws IOException;
 100
 101         /**
 102          * Return the story description.
 103          *
 104          * @param source
 105          *            the source of the story
 106          * @param in
 107          *            the input (the main resource)
 108          *
 109          * @return the description
 110          *
 111          * @throws IOException
 112          *             in case of I/O error
 113          */
 114         protected abstract String getDesc(URL source, InputStream in)
 115                         throws IOException;
 116
 117         /**
 118          * Return the list of chapters (name and resource).
 119          *
 120          * @param source
 121          *            the source of the story
 122          * @param in
 123          *            the input (the main resource)
 124          * @param pg
 125          *            the optional progress reporter
 126          *
 127          * @return the chapters
 128          *
 129          * @throws IOException
 130          *             in case of I/O error
 131          */
 132         protected abstract List<Entry<String, URL>> getChapters(URL source,
 133                         InputStream in, Progress pg) throws IOException;
 134
 135         /**
 136          * Return the content of the chapter (possibly HTML encoded, if
 137          * {@link BasicSupport_Deprecated#isHtml()} is TRUE).
 138          *
 139          * @param source
 140          *            the source of the story
 141          * @param in
 142          *            the input (the main resource)
 143          * @param number
 144          *            the chapter number
 145          * @param pg
 146          *            the optional progress reporter
 147          *
 148          * @return the content
 149          *
 150          * @throws IOException
 151          *             in case of I/O error
 152          */
 153         protected abstract String getChapterContent(URL source, InputStream in,
 154                         int number, Progress pg) throws IOException;
 155
 156         /**
 157          * Process the given story resource into a partially filled {@link Story}
 158          * object containing the name and metadata, except for the description.
 159          *
 160          * @param url
 161          *            the story resource
 162          *
 163          * @return the {@link Story}
 164          *
 165          * @throws IOException
 166          *             in case of I/O error
 167          */
 168         public Story processMeta(URL url) throws IOException {
 169                 return processMeta(url, true, false, null);
 170         }
 171
 172         /**
 173          * Process the given story resource into a partially filled {@link Story}
 174          * object containing the name and metadata.
 175          *
 176          * @param url
 177          *            the story resource
 178          * @param close
 179          *            close "this" and "in" when done
 180          * @param getDesc
 181          *            retrieve the description of the story, or not
 182          * @param pg
 183          *            the optional progress reporter
 184          *
 185          * @return the {@link Story}, never NULL
 186          *
 187          * @throws IOException
 188          *             in case of I/O error
 189          */
 190         protected Story processMeta(URL url, boolean close, boolean getDesc,
 191                         Progress pg) throws IOException {
 192                 if (pg == null) {
 193                         pg = new Progress();
 194                 } else {
 195                         pg.setMinMax(0, 100);
 196                 }
 197
 198                 login();
 199                 pg.setProgress(10);
 200
 201                 url = getCanonicalUrl(url);
 202
 203                 setCurrentReferer(url);
 204
 205                 in = openInput(url); // NULL allowed here
 206                 try {
 207                         preprocess(url, getInput());
 208                         pg.setProgress(30);
 209
 210                         Story story = new Story();
 211                         MetaData meta = getMeta(url, getInput());
 212                         if (meta.getCreationDate() == null
 213                                         || meta.getCreationDate().isEmpty()) {
 214                                 meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
 215                         }
 216                         story.setMeta(meta);
 217
 218                         pg.setProgress(50);
 219
 220                         if (meta.getCover() == null) {
 221                                 meta.setCover(getDefaultCover(meta.getSubject()));
 222                         }
 223
 224                         pg.setProgress(60);
 225
 226                         if (getDesc) {
 227                                 String descChapterName = Instance.getTrans().getString(
 228                                                 StringId.DESCRIPTION);
 229                                 story.getMeta().setResume(
 230                                                 makeChapter(url, 0, descChapterName,
 231                                                                 getDesc(url, getInput()), null));
 232                         }
 233
 234                         pg.setProgress(100);
 235                         return story;
 236                 } finally {
 237                         if (close) {
 238                                 close();
 239
 240                                 if (in != null) {
 241                                         in.close();
 242                                 }
 243                         }
 244                 }
 245         }
 246
 247         /**
 248          * Process the given story resource into a fully filled {@link Story}
 249          * object.
 250          *
 251          * @param url
 252          *            the story resource
 253          * @param pg
 254          *            the optional progress reporter
 255          *
 256          * @return the {@link Story}, never NULL
 257          *
 258          * @throws IOException
 259          *             in case of I/O error
 260          */
 261         protected Story process(URL url, Progress pg) throws IOException {
 262                 if (pg == null) {
 263                         pg = new Progress();
 264                 } else {
 265                         pg.setMinMax(0, 100);
 266                 }
 267
 268                 url = getCanonicalUrl(url);
 269                 pg.setProgress(1);
 270                 try {
 271                         Progress pgMeta = new Progress();
 272                         pg.addProgress(pgMeta, 10);
 273                         Story story = processMeta(url, false, true, pgMeta);
 274                         if (!pgMeta.isDone()) {
 275                                 pgMeta.setProgress(pgMeta.getMax()); // 10%
 276                         }
 277
 278                         pg.setName("Retrieving " + story.getMeta().getTitle());
 279
 280                         setCurrentReferer(url);
 281
 282                         Progress pgGetChapters = new Progress();
 283                         pg.addProgress(pgGetChapters, 10);
 284                         story.setChapters(new ArrayList<Chapter>());
 285                         List<Entry<String, URL>> chapters = getChapters(url, getInput(),
 286                                         pgGetChapters);
 287                         if (!pgGetChapters.isDone()) {
 288                                 pgGetChapters.setProgress(pgGetChapters.getMax()); // 20%
 289                         }
 290
 291                         if (chapters != null) {
 292                                 Progress pgChaps = new Progress("Extracting chapters", 0,
 293                                                 chapters.size() * 300);
 294                                 pg.addProgress(pgChaps, 80);
 295
 296                                 long words = 0;
 297                                 int i = 1;
 298                                 for (Entry<String, URL> chap : chapters) {
 299                                         pgChaps.setName("Extracting chapter " + i);
 300                                         InputStream chapIn = null;
 301                                         if (chap.getValue() != null) {
 302                                                 setCurrentReferer(chap.getValue());
 303                                                 chapIn = Instance.getCache().open(chap.getValue(),
 304                                                                 this, false);
 305                                         }
 306                                         pgChaps.setProgress(i * 100);
 307                                         try {
 308                                                 Progress pgGetChapterContent = new Progress();
 309                                                 Progress pgMakeChapter = new Progress();
 310                                                 pgChaps.addProgress(pgGetChapterContent, 100);
 311                                                 pgChaps.addProgress(pgMakeChapter, 100);
 312
 313                                                 String content = getChapterContent(url, chapIn, i,
 314                                                                 pgGetChapterContent);
 315                                                 if (!pgGetChapterContent.isDone()) {
 316                                                         pgGetChapterContent.setProgress(pgGetChapterContent
 317                                                                         .getMax());
 318                                                 }
 319
 320                                                 Chapter cc = makeChapter(url, i, chap.getKey(),
 321                                                                 content, pgMakeChapter);
 322                                                 if (!pgMakeChapter.isDone()) {
 323                                                         pgMakeChapter.setProgress(pgMakeChapter.getMax());
 324                                                 }
 325
 326                                                 words += cc.getWords();
 327                                                 story.getChapters().add(cc);
 328                                                 story.getMeta().setWords(words);
 329                                         } finally {
 330                                                 if (chapIn != null) {
 331                                                         chapIn.close();
 332                                                 }
 333                                         }
 334
 335                                         i++;
 336                                 }
 337
 338                                 pgChaps.setName("Extracting chapters");
 339                         } else {
 340                                 pg.setProgress(80);
 341                         }
 342
 343                         return story;
 344
 345                 } finally {
 346                         close();
 347
 348                         if (in != null) {
 349                                 in.close();
 350                         }
 351                 }
 352         }
 353
 354         /**
 355          * Prepare the support if needed before processing.
 356          *
 357          * @param source
 358          *            the source of the story
 359          * @param in
 360          *            the input (the main resource)
 361          *
 362          * @throws IOException
 363          *             on I/O error
 364          */
 365         @SuppressWarnings("unused")
 366         protected void preprocess(URL source, InputStream in) throws IOException {
 367         }
 368
 369         /**
 370          * Create a {@link Chapter} object from the given information, formatting
 371          * the content as it should be.
 372          *
 373          * @param source
 374          *            the source of the story
 375          * @param number
 376          *            the chapter number
 377          * @param name
 378          *            the chapter name
 379          * @param content
 380          *            the chapter content
 381          * @param pg
 382          *            the optional progress reporter
 383          *
 384          * @return the {@link Chapter}
 385          *
 386          * @throws IOException
 387          *             in case of I/O error
 388          */
 389         protected Chapter makeChapter(URL source, int number, String name,
 390                         String content, Progress pg) throws IOException {
 391                 // Chapter name: process it correctly, then remove the possible
 392                 // redundant "Chapter x: " in front of it, or "-" (as in
 393                 // "Chapter 5: - Fun!" after the ": " was automatically added)
 394                 String chapterName = processPara(name).getContent().trim();
 395                 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
 396                                 .split(",")) {
 397                         String chapterWord = Instance.getConfig().getStringX(
 398                                         Config.CHAPTER, lang);
 399                         if (chapterName.startsWith(chapterWord)) {
 400                                 chapterName = chapterName.substring(chapterWord.length())
 401                                                 .trim();
 402                                 break;
 403                         }
 404                 }
 405
 406                 if (chapterName.startsWith(Integer.toString(number))) {
 407                         chapterName = chapterName.substring(
 408                                         Integer.toString(number).length()).trim();
 409                 }
 410
 411                 while (chapterName.startsWith(":") || chapterName.startsWith("-")) {
 412                         chapterName = chapterName.substring(1).trim();
 413                 }
 414                 //
 415
 416                 Chapter chap = new Chapter(number, chapterName);
 417
 418                 if (content != null) {
 419                         List<Paragraph> paras = makeParagraphs(source, content, pg);
 420                         long words = 0;
 421                         for (Paragraph para : paras) {
 422                                 words += para.getWords();
 423                         }
 424                         chap.setParagraphs(paras);
 425                         chap.setWords(words);
 426                 }
 427
 428                 return chap;
 429
 430         }
 431
 432         /**
 433          * Convert the given content into {@link Paragraph}s.
 434          *
 435          * @param source
 436          *            the source URL of the story
 437          * @param content
 438          *            the textual content
 439          * @param pg
 440          *            the optional progress reporter
 441          *
 442          * @return the {@link Paragraph}s
 443          *
 444          * @throws IOException
 445          *             in case of I/O error
 446          */
 447         protected List<Paragraph> makeParagraphs(URL source, String content,
 448                         Progress pg) throws IOException {
 449                 if (pg == null) {
 450                         pg = new Progress();
 451                 }
 452
 453                 if (isHtml()) {
 454                         // Special <HR> processing:
 455                         content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
 456                                         "<br/>* * *<br/>");
 457                 }
 458
 459                 List<Paragraph> paras = new ArrayList<Paragraph>();
 460
 461                 if (content != null && !content.trim().isEmpty()) {
 462                         if (isHtml()) {
 463                                 String[] tab = content.split("(<p>|</p>|<br>|<br/>)");
 464                                 pg.setMinMax(0, tab.length);
 465                                 int i = 1;
 466                                 for (String line : tab) {
 467                                         if (line.startsWith("[") && line.endsWith("]")) {
 468                                                 pg.setName("Extracting image " + i);
 469                                         }
 470                                         paras.add(makeParagraph(source, line.trim()));
 471                                         pg.setProgress(i++);
 472                                 }
 473                                 pg.setName(null);
 474                         } else {
 475                                 List<String> lines = new ArrayList<String>();
 476                                 BufferedReader buff = null;
 477                                 try {
 478                                         buff = new BufferedReader(
 479                                                         new InputStreamReader(new ByteArrayInputStream(
 480                                                                         content.getBytes("UTF-8")), "UTF-8"));
 481                                         for (String line = buff.readLine(); line != null; line = buff
 482                                                         .readLine()) {
 483                                                 lines.add(line.trim());
 484                                         }
 485                                 } finally {
 486                                         if (buff != null) {
 487                                                 buff.close();
 488                                         }
 489                                 }
 490
 491                                 pg.setMinMax(0, lines.size());
 492                                 int i = 0;
 493                                 for (String line : lines) {
 494                                         if (line.startsWith("[") && line.endsWith("]")) {
 495                                                 pg.setName("Extracting image " + i);
 496                                         }
 497                                         paras.add(makeParagraph(source, line));
 498                                         pg.setProgress(i++);
 499                                 }
 500                                 pg.setName(null);
 501                         }
 502
 503                         // Check quotes for "bad" format
 504                         List<Paragraph> newParas = new ArrayList<Paragraph>();
 505                         for (Paragraph para : paras) {
 506                                 newParas.addAll(requotify(para));
 507                         }
 508                         paras = newParas;
 509
 510                         // Remove double blanks/brks
 511                         fixBlanksBreaks(paras);
 512                 }
 513
 514                 return paras;
 515         }
 516
 517         /**
 518          * Convert the given line into a single {@link Paragraph}.
 519          *
 520          * @param source
 521          *            the source URL of the story
 522          * @param line
 523          *            the textual content of the paragraph
 524          *
 525          * @return the {@link Paragraph}
 526          */
 527         private Paragraph makeParagraph(URL source, String line) {
 528                 Image image = null;
 529                 if (line.startsWith("[") && line.endsWith("]")) {
 530                         image = getImage(this, source, line.substring(1, line.length() - 1)
 531                                         .trim());
 532                 }
 533
 534                 if (image != null) {
 535                         return new Paragraph(image);
 536                 }
 537
 538                 return processPara(line);
 539         }
 540
 541         /**
 542          * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
 543          * those {@link Paragraph}s.
 544          * <p>
 545          * The resulting list will not contain a starting or trailing blank/break
 546          * nor 2 blanks or breaks following each other.
 547          *
 548          * @param paras
 549          *            the list of {@link Paragraph}s to fix
 550          */
 551         protected void fixBlanksBreaks(List<Paragraph> paras) {
 552                 boolean space = false;
 553                 boolean brk = true;
 554                 for (int i = 0; i < paras.size(); i++) {
 555                         Paragraph para = paras.get(i);
 556                         boolean thisSpace = para.getType() == ParagraphType.BLANK;
 557                         boolean thisBrk = para.getType() == ParagraphType.BREAK;
 558
 559                         if (i > 0 && space && thisBrk) {
 560                                 paras.remove(i - 1);
 561                                 i--;
 562                         } else if ((space || brk) && (thisSpace || thisBrk)) {
 563                                 paras.remove(i);
 564                                 i--;
 565                         }
 566
 567                         space = thisSpace;
 568                         brk = thisBrk;
 569                 }
 570
 571                 // Remove blank/brk at start
 572                 if (paras.size() > 0
 573                                 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
 574                                                 0).getType() == ParagraphType.BREAK)) {
 575                         paras.remove(0);
 576                 }
 577
 578                 // Remove blank/brk at end
 579                 int last = paras.size() - 1;
 580                 if (paras.size() > 0
 581                                 && (paras.get(last).getType() == ParagraphType.BLANK || paras
 582                                                 .get(last).getType() == ParagraphType.BREAK)) {
 583                         paras.remove(last);
 584                 }
 585         }
 586
 587         /**
 588          * Get the default cover related to this subject (see <tt>.info</tt> files).
 589          *
 590          * @param subject
 591          *            the subject
 592          *
 593          * @return the cover if any, or NULL
 594          */
 595         static Image getDefaultCover(String subject) {
 596                 if (subject != null && !subject.isEmpty()
 597                                 && Instance.getCoverDir() != null) {
 598                         try {
 599                                 File fileCover = new File(Instance.getCoverDir(), subject);
 600                                 return getImage(null, fileCover.toURI().toURL(), subject);
 601                         } catch (MalformedURLException e) {
 602                         }
 603                 }
 604
 605                 return null;
 606         }
 607
 608         /**
 609          * Return the list of supported image extensions.
 610          *
 611          * @param emptyAllowed
 612          *            TRUE to allow an empty extension on first place, which can be
 613          *            used when you may already have an extension in your input but
 614          *            are not sure about it
 615          *
 616          * @return the extensions
 617          */
 618         static String[] getImageExt(boolean emptyAllowed) {
 619                 if (emptyAllowed) {
 620                         return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 621                 }
 622
 623                 return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 624         }
 625
 626         /**
 627          * Check if the given resource can be a local image or a remote image, then
 628          * refresh the cache with it if it is.
 629          *
 630          * @param source
 631          *            the story source
 632          * @param line
 633          *            the resource to check
 634          *
 635          * @return the image if found, or NULL
 636          *
 637          */
 638         static Image getImage(BasicSupport_Deprecated support, URL source,
 639                         String line) {
 640                 URL url = getImageUrl(support, source, line);
 641                 if (url != null) {
 642                         if ("file".equals(url.getProtocol())) {
 643                                 if (new File(url.getPath()).isDirectory()) {
 644                                         return null;
 645                                 }
 646                         }
 647                         InputStream in = null;
 648                         try {
 649                                 in = Instance.getCache().open(url, getSupport(url), true);
 650                                 return new Image(in);
 651                         } catch (IOException e) {
 652                         } finally {
 653                                 if (in != null) {
 654                                         try {
 655                                                 in.close();
 656                                         } catch (IOException e) {
 657                                         }
 658                                 }
 659                         }
 660                 }
 661
 662                 return null;
 663         }
 664
 665         /**
 666          * Check if the given resource can be a local image or a remote image, then
 667          * refresh the cache with it if it is.
 668          *
 669          * @param source
 670          *            the story source
 671          * @param line
 672          *            the resource to check
 673          *
 674          * @return the image URL if found, or NULL
 675          *
 676          */
 677         static URL getImageUrl(BasicSupport_Deprecated support, URL source,
 678                         String line) {
 679                 URL url = null;
 680
 681                 if (line != null) {
 682                         // try for files
 683                         if (source != null) {
 684                                 try {
 685
 686                                         String relPath = null;
 687                                         String absPath = null;
 688                                         try {
 689                                                 String path = new File(source.getFile()).getParent();
 690                                                 relPath = new File(new File(path), line.trim())
 691                                                                 .getAbsolutePath();
 692                                         } catch (Exception e) {
 693                                                 // Cannot be converted to path (one possibility to take
 694                                                 // into account: absolute path on Windows)
 695                                         }
 696                                         try {
 697                                                 absPath = new File(line.trim()).getAbsolutePath();
 698                                         } catch (Exception e) {
 699                                                 // Cannot be converted to path (at all)
 700                                         }
 701
 702                                         for (String ext : getImageExt(true)) {
 703                                                 File absFile = new File(absPath + ext);
 704                                                 File relFile = new File(relPath + ext);
 705                                                 if (absPath != null && absFile.exists()
 706                                                                 && absFile.isFile()) {
 707                                                         url = absFile.toURI().toURL();
 708                                                 } else if (relPath != null && relFile.exists()
 709                                                                 && relFile.isFile()) {
 710                                                         url = relFile.toURI().toURL();
 711                                                 }
 712                                         }
 713                                 } catch (Exception e) {
 714                                         // Should not happen since we control the correct arguments
 715                                 }
 716                         }
 717
 718                         if (url == null) {
 719                                 // try for URLs
 720                                 try {
 721                                         for (String ext : getImageExt(true)) {
 722                                                 if (Instance.getCache()
 723                                                                 .check(new URL(line + ext), true)) {
 724                                                         url = new URL(line + ext);
 725                                                         break;
 726                                                 }
 727                                         }
 728
 729                                         // try out of cache
 730                                         if (url == null) {
 731                                                 for (String ext : getImageExt(true)) {
 732                                                         try {
 733                                                                 url = new URL(line + ext);
 734                                                                 Instance.getCache().refresh(url, support, true);
 735                                                                 break;
 736                                                         } catch (IOException e) {
 737                                                                 // no image with this ext
 738                                                                 url = null;
 739                                                         }
 740                                                 }
 741                                         }
 742                                 } catch (MalformedURLException e) {
 743                                         // Not an url
 744                                 }
 745                         }
 746
 747                         // refresh the cached file
 748                         if (url != null) {
 749                                 try {
 750                                         Instance.getCache().refresh(url, support, true);
 751                                 } catch (IOException e) {
 752                                         // woops, broken image
 753                                         url = null;
 754                                 }
 755                         }
 756                 }
 757
 758                 return url;
 759         }
 760
 761         /**
 762          * Open the input file that will be used through the support.
 763          * <p>
 764          * Can return NULL, in which case you are supposed to work without an
 765          * {@link InputStream}.
 766          *
 767          * @param source
 768          *            the source {@link URL}
 769          *
 770          * @return the {@link InputStream}
 771          *
 772          * @throws IOException
 773          *             in case of I/O error
 774          */
 775         protected InputStream openInput(URL source) throws IOException {
 776                 return Instance.getCache().open(source, this, false);
 777         }
 778
 779         /**
 780          * Reset then return {@link BasicSupport_Deprecated#in}.
 781          *
 782          * @return {@link BasicSupport_Deprecated#in}
 783          */
 784         protected InputStream getInput() {
 785                 return reset(in);
 786         }
 787
 788         /**
 789          * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
 790          * and requotify them (i.e., separate them into QUOTE paragraphs and other
 791          * paragraphs (quotes or not)).
 792          *
 793          * @param para
 794          *            the paragraph to requotify (not necessarily a quote)
 795          *
 796          * @return the correctly (or so we hope) quotified paragraphs
 797          */
 798         protected List<Paragraph> requotify(Paragraph para) {
 799                 List<Paragraph> newParas = new ArrayList<Paragraph>();
 800
 801                 if (para.getType() == ParagraphType.QUOTE
 802                                 && para.getContent().length() > 2) {
 803                         String line = para.getContent();
 804                         boolean singleQ = line.startsWith("" + openQuote);
 805                         boolean doubleQ = line.startsWith("" + openDoubleQuote);
 806
 807                         // Do not try when more than one quote at a time
 808                         // (some stories are not easily readable if we do)
 809                         if (singleQ
 810                                         && line.indexOf(closeQuote, 1) < line
 811                                                         .lastIndexOf(closeQuote)) {
 812                                 newParas.add(para);
 813                                 return newParas;
 814                         }
 815                         if (doubleQ
 816                                         && line.indexOf(closeDoubleQuote, 1) < line
 817                                                         .lastIndexOf(closeDoubleQuote)) {
 818                                 newParas.add(para);
 819                                 return newParas;
 820                         }
 821                         //
 822
 823                         if (!singleQ && !doubleQ) {
 824                                 line = openDoubleQuote + line + closeDoubleQuote;
 825                                 newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
 826                                                 .getWords()));
 827                         } else {
 828                                 char open = singleQ ? openQuote : openDoubleQuote;
 829                                 char close = singleQ ? closeQuote : closeDoubleQuote;
 830
 831                                 int posDot = -1;
 832                                 boolean inQuote = false;
 833                                 int i = 0;
 834                                 for (char car : line.toCharArray()) {
 835                                         if (car == open) {
 836                                                 inQuote = true;
 837                                         } else if (car == close) {
 838                                                 inQuote = false;
 839                                         } else if (car == '.' && !inQuote) {
 840                                                 posDot = i;
 841                                                 break;
 842                                         }
 843                                         i++;
 844                                 }
 845
 846                                 if (posDot >= 0) {
 847                                         String rest = line.substring(posDot + 1).trim();
 848                                         line = line.substring(0, posDot + 1).trim();
 849                                         long words = 1;
 850                                         for (char car : line.toCharArray()) {
 851                                                 if (car == ' ') {
 852                                                         words++;
 853                                                 }
 854                                         }
 855                                         newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
 856                                         if (!rest.isEmpty()) {
 857                                                 newParas.addAll(requotify(processPara(rest)));
 858                                         }
 859                                 } else {
 860                                         newParas.add(para);
 861                                 }
 862                         }
 863                 } else {
 864                         newParas.add(para);
 865                 }
 866
 867                 return newParas;
 868         }
 869
 870         /**
 871          * Process a {@link Paragraph} from a raw line of text.
 872          * <p>
 873          * Will also fix quotes and HTML encoding if needed.
 874          *
 875          * @param line
 876          *            the raw line
 877          *
 878          * @return the processed {@link Paragraph}
 879          */
 880         protected Paragraph processPara(String line) {
 881                 line = ifUnhtml(line).trim();
 882
 883                 boolean space = true;
 884                 boolean brk = true;
 885                 boolean quote = false;
 886                 boolean tentativeCloseQuote = false;
 887                 char prev = '\0';
 888                 int dashCount = 0;
 889                 long words = 1;
 890
 891                 StringBuilder builder = new StringBuilder();
 892                 for (char car : line.toCharArray()) {
 893                         if (car != '-') {
 894                                 if (dashCount > 0) {
 895                                         // dash, ndash and mdash: - – —
 896                                         // currently: always use mdash
 897                                         builder.append(dashCount == 1 ? '-' : '—');
 898                                 }
 899                                 dashCount = 0;
 900                         }
 901
 902                         if (tentativeCloseQuote) {
 903                                 tentativeCloseQuote = false;
 904                                 if (Character.isLetterOrDigit(car)) {
 905                                         builder.append("'");
 906                                 } else {
 907                                         // handle double-single quotes as double quotes
 908                                         if (prev == car) {
 909                                                 builder.append(closeDoubleQuote);
 910                                                 continue;
 911                                         }
 912
 913                                         builder.append(closeQuote);
 914                                 }
 915                         }
 916
 917                         switch (car) {
 918                         case ' ': // note: unbreakable space
 919                         case ' ':
 920                         case '\t':
 921                         case '\n': // just in case
 922                         case '\r': // just in case
 923                                 if (builder.length() > 0
 924                                                 && builder.charAt(builder.length() - 1) != ' ') {
 925                                         words++;
 926                                 }
 927                                 builder.append(' ');
 928                                 break;
 929
 930                         case '\'':
 931                                 if (space || (brk && quote)) {
 932                                         quote = true;
 933                                         // handle double-single quotes as double quotes
 934                                         if (prev == car) {
 935                                                 builder.deleteCharAt(builder.length() - 1);
 936                                                 builder.append(openDoubleQuote);
 937                                         } else {
 938                                                 builder.append(openQuote);
 939                                         }
 940                                 } else if (prev == ' ' || prev == car) {
 941                                         // handle double-single quotes as double quotes
 942                                         if (prev == car) {
 943                                                 builder.deleteCharAt(builder.length() - 1);
 944                                                 builder.append(openDoubleQuote);
 945                                         } else {
 946                                                 builder.append(openQuote);
 947                                         }
 948                                 } else {
 949                                         // it is a quote ("I'm off") or a 'quote' ("This
 950                                         // 'good' restaurant"...)
 951                                         tentativeCloseQuote = true;
 952                                 }
 953                                 break;
 954
 955                         case '"':
 956                                 if (space || (brk && quote)) {
 957                                         quote = true;
 958                                         builder.append(openDoubleQuote);
 959                                 } else if (prev == ' ') {
 960                                         builder.append(openDoubleQuote);
 961                                 } else {
 962                                         builder.append(closeDoubleQuote);
 963                                 }
 964                                 break;
 965
 966                         case '-':
 967                                 if (space) {
 968                                         quote = true;
 969                                 } else {
 970                                         dashCount++;
 971                                 }
 972                                 space = false;
 973                                 break;
 974
 975                         case '*':
 976                         case '~':
 977                         case '/':
 978                         case '\\':
 979                         case '<':
 980                         case '>':
 981                         case '=':
 982                         case '+':
 983                         case '_':
 984                         case '–':
 985                         case '—':
 986                                 space = false;
 987                                 builder.append(car);
 988                                 break;
 989
 990                         case '‘':
 991                         case '`':
 992                         case '‹':
 993                         case '﹁':
 994                         case '〈':
 995                         case '「':
 996                                 if (space || (brk && quote)) {
 997                                         quote = true;
 998                                         builder.append(openQuote);
 999                                 } else {
1000                                         // handle double-single quotes as double quotes
1001                                         if (prev == car) {
1002                                                 builder.deleteCharAt(builder.length() - 1);
1003                                                 builder.append(openDoubleQuote);
1004                                         } else {
1005                                                 builder.append(openQuote);
1006                                         }
1007                                 }
1008                                 space = false;
1009                                 brk = false;
1010                                 break;
1011
1012                         case '’':
1013                         case '›':
1014                         case '﹂':
1015                         case '〉':
1016                         case '」':
1017                                 space = false;
1018                                 brk = false;
1019                                 // handle double-single quotes as double quotes
1020                                 if (prev == car) {
1021                                         builder.deleteCharAt(builder.length() - 1);
1022                                         builder.append(closeDoubleQuote);
1023                                 } else {
1024                                         builder.append(closeQuote);
1025                                 }
1026                                 break;
1027
1028                         case '«':
1029                         case '“':
1030                         case '﹃':
1031                         case '《':
1032                         case '『':
1033                                 if (space || (brk && quote)) {
1034                                         quote = true;
1035                                         builder.append(openDoubleQuote);
1036                                 } else {
1037                                         builder.append(openDoubleQuote);
1038                                 }
1039                                 space = false;
1040                                 brk = false;
1041                                 break;
1042
1043                         case '»':
1044                         case '”':
1045                         case '﹄':
1046                         case '》':
1047                         case '』':
1048                                 space = false;
1049                                 brk = false;
1050                                 builder.append(closeDoubleQuote);
1051                                 break;
1052
1053                         default:
1054                                 space = false;
1055                                 brk = false;
1056                                 builder.append(car);
1057                                 break;
1058                         }
1059
1060                         prev = car;
1061                 }
1062
1063                 if (tentativeCloseQuote) {
1064                         tentativeCloseQuote = false;
1065                         builder.append(closeQuote);
1066                 }
1067
1068                 line = builder.toString().trim();
1069
1070                 ParagraphType type = ParagraphType.NORMAL;
1071                 if (space) {
1072                         type = ParagraphType.BLANK;
1073                 } else if (brk) {
1074                         type = ParagraphType.BREAK;
1075                 } else if (quote) {
1076                         type = ParagraphType.QUOTE;
1077                 }
1078
1079                 return new Paragraph(type, line, words);
1080         }
1081
1082         /**
1083          * Remove the HTML from the input <b>if</b>
1084          * {@link BasicSupport_Deprecated#isHtml()} is true.
1085          *
1086          * @param input
1087          *            the input
1088          *
1089          * @return the no html version if needed
1090          */
1091         private String ifUnhtml(String input) {
1092                 if (isHtml() && input != null) {
1093                         return StringUtils.unhtml(input);
1094                 }
1095
1096                 return input;
1097         }
1098
1099         /**
1100          * Reset the given {@link InputStream} and return it.
1101          *
1102          * @param in
1103          *            the {@link InputStream} to reset
1104          *
1105          * @return the same {@link InputStream} after reset
1106          */
1107         static protected InputStream reset(InputStream in) {
1108                 try {
1109                         if (in != null) {
1110                                 in.reset();
1111                         }
1112                 } catch (IOException e) {
1113                 }
1114
1115                 return in;
1116         }
1117
1118         /**
1119          * Return the first line from the given input which correspond to the given
1120          * selectors.
1121          *
1122          * @param in
1123          *            the input
1124          * @param needle
1125          *            a string that must be found inside the target line (also
1126          *            supports "^" at start to say "only if it starts with" the
1127          *            needle)
1128          * @param relativeLine
1129          *            the line to return based upon the target line position (-1 =
1130          *            the line before, 0 = the target line...)
1131          *
1132          * @return the line
1133          */
1134         static protected String getLine(InputStream in, String needle,
1135                         int relativeLine) {
1136                 return getLine(in, needle, relativeLine, true);
1137         }
1138
1139         /**
1140          * Return a line from the given input which correspond to the given
1141          * selectors.
1142          *
1143          * @param in
1144          *            the input
1145          * @param needle
1146          *            a string that must be found inside the target line (also
1147          *            supports "^" at start to say "only if it starts with" the
1148          *            needle)
1149          * @param relativeLine
1150          *            the line to return based upon the target line position (-1 =
1151          *            the line before, 0 = the target line...)
1152          * @param first
1153          *            takes the first result (as opposed to the last one, which will
1154          *            also always spend the input)
1155          *
1156          * @return the line
1157          */
1158         static protected String getLine(InputStream in, String needle,
1159                         int relativeLine, boolean first) {
1160                 String rep = null;
1161
1162                 reset(in);
1163
1164                 List<String> lines = new ArrayList<String>();
1165                 @SuppressWarnings("resource")
1166                 Scanner scan = new Scanner(in, "UTF-8");
1167                 int index = -1;
1168                 scan.useDelimiter("\\n");
1169                 while (scan.hasNext()) {
1170                         lines.add(scan.next());
1171
1172                         if (index == -1) {
1173                                 if (needle.startsWith("^")) {
1174                                         if (lines.get(lines.size() - 1).startsWith(
1175                                                         needle.substring(1))) {
1176                                                 index = lines.size() - 1;
1177                                         }
1178
1179                                 } else {
1180                                         if (lines.get(lines.size() - 1).contains(needle)) {
1181                                                 index = lines.size() - 1;
1182                                         }
1183                                 }
1184                         }
1185
1186                         if (index >= 0 && index + relativeLine < lines.size()) {
1187                                 rep = lines.get(index + relativeLine);
1188                                 if (first) {
1189                                         break;
1190                                 }
1191                         }
1192                 }
1193
1194                 return rep;
1195         }
1196
1197         /**
1198          * Return the text between the key and the endKey (and optional subKey can
1199          * be passed, in this case we will look for the key first, then take the
1200          * text between the subKey and the endKey).
1201          * <p>
1202          * Will only match the first line with the given key if more than one are
1203          * possible. Which also means that if the subKey or endKey is not found on
1204          * that line, NULL will be returned.
1205          *
1206          * @param in
1207          *            the input
1208          * @param key
1209          *            the key to match (also supports "^" at start to say
1210          *            "only if it starts with" the key)
1211          * @param subKey
1212          *            the sub key or NULL if none
1213          * @param endKey
1214          *            the end key or NULL for "up to the end"
1215          * @return the text or NULL if not found
1216          */
1217         static protected String getKeyLine(InputStream in, String key,
1218                         String subKey, String endKey) {
1219                 return getKeyText(getLine(in, key, 0), key, subKey, endKey);
1220         }
1221
1222         /**
1223          * Return the text between the key and the endKey (and optional subKey can
1224          * be passed, in this case we will look for the key first, then take the
1225          * text between the subKey and the endKey).
1226          *
1227          * @param in
1228          *            the input
1229          * @param key
1230          *            the key to match (also supports "^" at start to say
1231          *            "only if it starts with" the key)
1232          * @param subKey
1233          *            the sub key or NULL if none
1234          * @param endKey
1235          *            the end key or NULL for "up to the end"
1236          * @return the text or NULL if not found
1237          */
1238         static protected String getKeyText(String in, String key, String subKey,
1239                         String endKey) {
1240                 String result = null;
1241
1242                 String line = in;
1243                 if (line != null && line.contains(key)) {
1244                         line = line.substring(line.indexOf(key) + key.length());
1245                         if (subKey == null || subKey.isEmpty() || line.contains(subKey)) {
1246                                 if (subKey != null) {
1247                                         line = line.substring(line.indexOf(subKey)
1248                                                         + subKey.length());
1249                                 }
1250                                 if (endKey == null || line.contains(endKey)) {
1251                                         if (endKey != null) {
1252                                                 line = line.substring(0, line.indexOf(endKey));
1253                                                 result = line;
1254                                         }
1255                                 }
1256                         }
1257                 }
1258
1259                 return result;
1260         }
1261
1262         /**
1263          * Return the text between the key and the endKey (optional subKeys can be
1264          * passed, in this case we will look for the subKeys first, then take the
1265          * text between the key and the endKey).
1266          *
1267          * @param in
1268          *            the input
1269          * @param key
1270          *            the key to match
1271          * @param endKey
1272          *            the end key or NULL for "up to the end"
1273          * @param afters
1274          *            the sub-keys to find before checking for key/endKey
1275          *
1276          * @return the text or NULL if not found
1277          */
1278         static protected String getKeyTextAfter(String in, String key,
1279                         String endKey, String... afters) {
1280
1281                 if (in != null && !in.isEmpty()) {
1282                         int pos = indexOfAfter(in, 0, afters);
1283                         if (pos < 0) {
1284                                 return null;
1285                         }
1286
1287                         in = in.substring(pos);
1288                 }
1289
1290                 return getKeyText(in, key, null, endKey);
1291         }
1292
1293         /**
1294          * Return the first index after all the given "afters" have been found in
1295          * the {@link String}, or -1 if it was not possible.
1296          *
1297          * @param in
1298          *            the input
1299          * @param startAt
1300          *            start at this position in the string
1301          * @param afters
1302          *            the sub-keys to find before checking for key/endKey
1303          *
1304          * @return the text or NULL if not found
1305          */
1306         static protected int indexOfAfter(String in, int startAt, String... afters) {
1307                 int pos = -1;
1308                 if (in != null && !in.isEmpty()) {
1309                         pos = startAt;
1310                         if (afters != null) {
1311                                 for (int i = 0; pos >= 0 && i < afters.length; i++) {
1312                                         String subKey = afters[i];
1313                                         if (!subKey.isEmpty()) {
1314                                                 pos = in.indexOf(subKey, pos);
1315                                                 if (pos >= 0) {
1316                                                         pos += subKey.length();
1317                                                 }
1318                                         }
1319                                 }
1320                         }
1321                 }
1322
1323                 return pos;
1324         }
1325 }