src/be/nikiroo/fanfix/supported/BasicSupport_Deprecated.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.io.BufferedReader;
   4 import java.io.ByteArrayInputStream;
   5 import java.io.File;
   6 import java.io.IOException;
   7 import java.io.InputStream;
   8 import java.io.InputStreamReader;
   9 import java.net.MalformedURLException;
  10 import java.net.URL;
  11 import java.util.ArrayList;
  12 import java.util.Date;
  13 import java.util.List;
  14 import java.util.Map.Entry;
  15 import java.util.Scanner;
  16
  17 import be.nikiroo.fanfix.Instance;
  18 import be.nikiroo.fanfix.bundles.Config;
  19 import be.nikiroo.fanfix.bundles.StringId;
  20 import be.nikiroo.fanfix.data.Chapter;
  21 import be.nikiroo.fanfix.data.MetaData;
  22 import be.nikiroo.fanfix.data.Paragraph;
  23 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
  24 import be.nikiroo.fanfix.data.Story;
  25 import be.nikiroo.utils.Image;
  26 import be.nikiroo.utils.Progress;
  27 import be.nikiroo.utils.StringUtils;
  28
  29 /**
  30  * DEPRECATED: use the new Jsoup 'Node' system.
  31  * <p>
  32  * This class is the base class used by the other support classes. It can be
  33  * used outside of this package, and have static method that you can use to get
  34  * access to the correct support class.
  35  * <p>
  36  * It will be used with 'resources' (usually web pages or files).
  37  *
  38  * @author niki
  39  */
  40 @Deprecated
  41 public abstract class BasicSupport_Deprecated extends BasicSupport {
  42         private InputStream in;
  43
  44         // quote chars
  45         private char openQuote = Instance.getInstance().getTrans().getCharacter(StringId.OPEN_SINGLE_QUOTE);
  46         private char closeQuote = Instance.getInstance().getTrans().getCharacter(StringId.CLOSE_SINGLE_QUOTE);
  47         private char openDoubleQuote = Instance.getInstance().getTrans().getCharacter(StringId.OPEN_DOUBLE_QUOTE);
  48         private char closeDoubleQuote = Instance.getInstance().getTrans().getCharacter(StringId.CLOSE_DOUBLE_QUOTE);
  49
  50         // New methods not used in Deprecated mode
  51         @Override
  52         protected String getDesc() throws IOException {
  53                 throw new RuntimeException("should not be used by legacy code");
  54         }
  55
  56         @Override
  57         protected MetaData getMeta() throws IOException {
  58                 throw new RuntimeException("should not be used by legacy code");
  59         }
  60
  61         @Override
  62         protected List<Entry<String, URL>> getChapters(Progress pg)
  63                         throws IOException {
  64                 throw new RuntimeException("should not be used by legacy code");
  65         }
  66
  67         @Override
  68         protected String getChapterContent(URL chapUrl, int number, Progress pg)
  69                         throws IOException {
  70                 throw new RuntimeException("should not be used by legacy code");
  71         }
  72
  73         @Override
  74         public Story process(Progress pg) throws IOException {
  75                 return process(getSource(), pg);
  76         }
  77
  78         //
  79
  80         /**
  81          * Return the {@link MetaData} of this story.
  82          *
  83          * @param source
  84          *            the source of the story
  85          * @param in
  86          *            the input (the main resource)
  87          *
  88          * @return the associated {@link MetaData}, never NULL
  89          *
  90          * @throws IOException
  91          *             in case of I/O error
  92          */
  93         protected abstract MetaData getMeta(URL source, InputStream in)
  94                         throws IOException;
  95
  96         /**
  97          * Return the story description.
  98          *
  99          * @param source
 100          *            the source of the story
 101          * @param in
 102          *            the input (the main resource)
 103          *
 104          * @return the description
 105          *
 106          * @throws IOException
 107          *             in case of I/O error
 108          */
 109         protected abstract String getDesc(URL source, InputStream in)
 110                         throws IOException;
 111
 112         /**
 113          * Return the list of chapters (name and resource).
 114          *
 115          * @param source
 116          *            the source of the story
 117          * @param in
 118          *            the input (the main resource)
 119          * @param pg
 120          *            the optional progress reporter
 121          *
 122          * @return the chapters
 123          *
 124          * @throws IOException
 125          *             in case of I/O error
 126          */
 127         protected abstract List<Entry<String, URL>> getChapters(URL source,
 128                         InputStream in, Progress pg) throws IOException;
 129
 130         /**
 131          * Return the content of the chapter (possibly HTML encoded, if
 132          * {@link BasicSupport_Deprecated#isHtml()} is TRUE).
 133          *
 134          * @param source
 135          *            the source of the story
 136          * @param in
 137          *            the input (the main resource)
 138          * @param number
 139          *            the chapter number
 140          * @param pg
 141          *            the optional progress reporter
 142          *
 143          * @return the content
 144          *
 145          * @throws IOException
 146          *             in case of I/O error
 147          */
 148         protected abstract String getChapterContent(URL source, InputStream in,
 149                         int number, Progress pg) throws IOException;
 150
 151         /**
 152          * Process the given story resource into a partially filled {@link Story}
 153          * object containing the name and metadata, except for the description.
 154          *
 155          * @param url
 156          *            the story resource
 157          *
 158          * @return the {@link Story}
 159          *
 160          * @throws IOException
 161          *             in case of I/O error
 162          */
 163         public Story processMeta(URL url) throws IOException {
 164                 return processMeta(url, true, false, null);
 165         }
 166
 167         /**
 168          * Process the given story resource into a partially filled {@link Story}
 169          * object containing the name and metadata.
 170          *
 171          * @param url
 172          *            the story resource
 173          * @param close
 174          *            close "this" and "in" when done
 175          * @param getDesc
 176          *            retrieve the description of the story, or not
 177          * @param pg
 178          *            the optional progress reporter
 179          *
 180          * @return the {@link Story}, never NULL
 181          *
 182          * @throws IOException
 183          *             in case of I/O error
 184          */
 185         protected Story processMeta(URL url, boolean close, boolean getDesc,
 186                         Progress pg) throws IOException {
 187                 if (pg == null) {
 188                         pg = new Progress();
 189                 } else {
 190                         pg.setMinMax(0, 100);
 191                 }
 192
 193                 login();
 194                 pg.setProgress(10);
 195
 196                 url = getCanonicalUrl(url);
 197
 198                 setCurrentReferer(url);
 199
 200                 in = openInput(url); // NULL allowed here
 201                 try {
 202                         preprocess(url, getInput());
 203                         pg.setProgress(30);
 204
 205                         Story story = new Story();
 206
 207                         MetaData meta = getMeta(url, getInput());
 208                         meta.setType(getType().toString());
 209                         meta.setSource(getType().getSourceName());
 210                         if (meta.getPublisher() == null) {
 211                                 meta.setPublisher(getType().getSourceName());
 212                         }
 213
 214                         if (meta.getCreationDate() == null
 215                                         || meta.getCreationDate().trim().isEmpty()) {
 216                                 meta.setCreationDate(bsHelper.formatDate(
 217                                                 StringUtils.fromTime(new Date().getTime())));
 218                         }
 219                         story.setMeta(meta);
 220                         pg.put("meta", meta);
 221
 222                         pg.setProgress(50);
 223
 224                         if (meta.getCover() == null) {
 225                                 meta.setCover(getDefaultCover(meta.getSubject()));
 226                         }
 227
 228                         pg.setProgress(60);
 229
 230                         if (getDesc) {
 231                                 String descChapterName = Instance.getInstance().getTrans().getString(StringId.DESCRIPTION);
 232                                 story.getMeta().setResume(makeChapter(url, 0, descChapterName, getDesc(url, getInput()), null));
 233                         }
 234
 235                         pg.setProgress(100);
 236                         return story;
 237                 } finally {
 238                         if (close) {
 239                                 close();
 240
 241                                 if (in != null) {
 242                                         in.close();
 243                                 }
 244                         }
 245                 }
 246         }
 247
 248         /**
 249          * Process the given story resource into a fully filled {@link Story}
 250          * object.
 251          *
 252          * @param url
 253          *            the story resource
 254          * @param pg
 255          *            the optional progress reporter
 256          *
 257          * @return the {@link Story}, never NULL
 258          *
 259          * @throws IOException
 260          *             in case of I/O error
 261          */
 262         protected Story process(URL url, Progress pg) throws IOException {
 263                 if (pg == null) {
 264                         pg = new Progress();
 265                 } else {
 266                         pg.setMinMax(0, 100);
 267                 }
 268
 269                 url = getCanonicalUrl(url);
 270                 pg.setProgress(1);
 271                 try {
 272                         Progress pgMeta = new Progress();
 273                         pg.addProgress(pgMeta, 10);
 274                         Story story = processMeta(url, false, true, pgMeta);
 275                         pg.put("meta", story.getMeta());
 276                         if (!pgMeta.isDone()) {
 277                                 pgMeta.setProgress(pgMeta.getMax()); // 10%
 278                         }
 279
 280                         setCurrentReferer(url);
 281
 282                         Progress pgGetChapters = new Progress();
 283                         pg.addProgress(pgGetChapters, 10);
 284                         story.setChapters(new ArrayList<Chapter>());
 285                         List<Entry<String, URL>> chapters = getChapters(url, getInput(),
 286                                         pgGetChapters);
 287                         if (!pgGetChapters.isDone()) {
 288                                 pgGetChapters.setProgress(pgGetChapters.getMax()); // 20%
 289                         }
 290
 291                         if (chapters != null) {
 292                                 Progress pgChaps = new Progress("Extracting chapters", 0,
 293                                                 chapters.size() * 300);
 294                                 pg.addProgress(pgChaps, 80);
 295
 296                                 long words = 0;
 297                                 int i = 1;
 298                                 for (Entry<String, URL> chap : chapters) {
 299                                         pgChaps.setName("Extracting chapter " + i);
 300                                         InputStream chapIn = null;
 301                                         if (chap.getValue() != null) {
 302                                                 setCurrentReferer(chap.getValue());
 303                                                 chapIn = Instance.getInstance().getCache().open(chap.getValue(), this, false);
 304                                         }
 305                                         pgChaps.setProgress(i * 100);
 306                                         try {
 307                                                 Progress pgGetChapterContent = new Progress();
 308                                                 Progress pgMakeChapter = new Progress();
 309                                                 pgChaps.addProgress(pgGetChapterContent, 100);
 310                                                 pgChaps.addProgress(pgMakeChapter, 100);
 311
 312                                                 String content = getChapterContent(url, chapIn, i,
 313                                                                 pgGetChapterContent);
 314                                                 if (!pgGetChapterContent.isDone()) {
 315                                                         pgGetChapterContent.setProgress(pgGetChapterContent
 316                                                                         .getMax());
 317                                                 }
 318
 319                                                 Chapter cc = makeChapter(url, i, chap.getKey(),
 320                                                                 content, pgMakeChapter);
 321                                                 if (!pgMakeChapter.isDone()) {
 322                                                         pgMakeChapter.setProgress(pgMakeChapter.getMax());
 323                                                 }
 324
 325                                                 words += cc.getWords();
 326                                                 story.getChapters().add(cc);
 327                                         } finally {
 328                                                 if (chapIn != null) {
 329                                                         chapIn.close();
 330                                                 }
 331                                         }
 332
 333                                         i++;
 334                                 }
 335
 336                                 story.getMeta().setWords(words);
 337
 338                                 pgChaps.setName("Extracting chapters");
 339                         } else {
 340                                 pg.setProgress(80);
 341                         }
 342
 343                         // Check for "no chapters" stories
 344                         if (story.getChapters().isEmpty()
 345                                         && story.getMeta().getResume() != null
 346                                         && !story.getMeta().getResume().getParagraphs().isEmpty()) {
 347                                 Chapter resume = story.getMeta().getResume();
 348                                 resume.setName("");
 349                                 resume.setNumber(1);
 350                                 story.getChapters().add(resume);
 351                                 story.getMeta().setWords(resume.getWords());
 352
 353                                 String descChapterName = Instance.getInstance().getTrans()
 354                                                 .getString(StringId.DESCRIPTION);
 355                                 resume = new Chapter(0, descChapterName);
 356                                 story.getMeta().setResume(resume);
 357                         }
 358
 359                         return story;
 360                 } finally {
 361                         close();
 362
 363                         if (in != null) {
 364                                 in.close();
 365                         }
 366                 }
 367         }
 368
 369         /**
 370          * Prepare the support if needed before processing.
 371          *
 372          * @param source
 373          *            the source of the story
 374          * @param in
 375          *            the input (the main resource)
 376          *
 377          * @throws IOException
 378          *             on I/O error
 379          */
 380         @SuppressWarnings("unused")
 381         protected void preprocess(URL source, InputStream in) throws IOException {
 382         }
 383
 384         /**
 385          * Create a {@link Chapter} object from the given information, formatting
 386          * the content as it should be.
 387          *
 388          * @param source
 389          *            the source of the story
 390          * @param number
 391          *            the chapter number
 392          * @param name
 393          *            the chapter name
 394          * @param content
 395          *            the chapter content
 396          * @param pg
 397          *            the optional progress reporter
 398          *
 399          * @return the {@link Chapter}, never NULL
 400          *
 401          * @throws IOException
 402          *             in case of I/O error
 403          */
 404         protected Chapter makeChapter(URL source, int number, String name,
 405                         String content, Progress pg) throws IOException {
 406                 // Chapter name: process it correctly, then remove the possible
 407                 // redundant "Chapter x: " in front of it, or "-" (as in
 408                 // "Chapter 5: - Fun!" after the ": " was automatically added)
 409                 String chapterName = processPara(name).getContent().trim();
 410                 for (String lang : Instance.getInstance().getConfig().getList(Config.CONF_CHAPTER)) {
 411                         String chapterWord = Instance.getInstance().getConfig().getStringX(Config.CONF_CHAPTER, lang);
 412                         if (chapterName.startsWith(chapterWord)) {
 413                                 chapterName = chapterName.substring(chapterWord.length())
 414                                                 .trim();
 415                                 break;
 416                         }
 417                 }
 418
 419                 if (chapterName.startsWith(Integer.toString(number))) {
 420                         chapterName = chapterName.substring(
 421                                         Integer.toString(number).length()).trim();
 422                 }
 423
 424                 while (chapterName.startsWith(":") || chapterName.startsWith("-")) {
 425                         chapterName = chapterName.substring(1).trim();
 426                 }
 427                 //
 428
 429                 Chapter chap = new Chapter(number, chapterName);
 430
 431                 if (content != null) {
 432                         List<Paragraph> paras = makeParagraphs(source, content, pg);
 433                         long words = 0;
 434                         for (Paragraph para : paras) {
 435                                 words += para.getWords();
 436                         }
 437                         chap.setParagraphs(paras);
 438                         chap.setWords(words);
 439                 }
 440
 441                 return chap;
 442
 443         }
 444
 445         /**
 446          * Convert the given content into {@link Paragraph}s.
 447          *
 448          * @param source
 449          *            the source URL of the story
 450          * @param content
 451          *            the textual content
 452          * @param pg
 453          *            the optional progress reporter
 454          *
 455          * @return the {@link Paragraph}s (can be empty, but never NULL)
 456          *
 457          * @throws IOException
 458          *             in case of I/O error
 459          */
 460         protected List<Paragraph> makeParagraphs(URL source, String content,
 461                         Progress pg) throws IOException {
 462                 if (pg == null) {
 463                         pg = new Progress();
 464                 }
 465
 466                 if (isHtml()) {
 467                         // Special <HR> processing:
 468                         content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
 469                                         "<br/>* * *<br/>");
 470                 }
 471
 472                 List<Paragraph> paras = new ArrayList<Paragraph>();
 473                 if (content != null && !content.trim().isEmpty()) {
 474                         if (isHtml()) {
 475                                 String[] tab = content.split("(<p>|</p>|<br>|<br/>)");
 476                                 pg.setMinMax(0, tab.length);
 477                                 int i = 1;
 478                                 for (String line : tab) {
 479                                         if (line.startsWith("[") && line.endsWith("]")) {
 480                                                 pg.setName("Extracting image " + i);
 481                                         }
 482                                         paras.add(makeParagraph(source, line.trim()));
 483                                         pg.setProgress(i++);
 484                                 }
 485                                 pg.setName(null);
 486                         } else {
 487                                 List<String> lines = new ArrayList<String>();
 488                                 BufferedReader buff = null;
 489                                 try {
 490                                         buff = new BufferedReader(
 491                                                         new InputStreamReader(new ByteArrayInputStream(
 492                                                                         content.getBytes("UTF-8")), "UTF-8"));
 493                                         for (String line = buff.readLine(); line != null; line = buff
 494                                                         .readLine()) {
 495                                                 lines.add(line.trim());
 496                                         }
 497                                 } finally {
 498                                         if (buff != null) {
 499                                                 buff.close();
 500                                         }
 501                                 }
 502
 503                                 pg.setMinMax(0, lines.size());
 504                                 int i = 0;
 505                                 for (String line : lines) {
 506                                         if (line.startsWith("[") && line.endsWith("]")) {
 507                                                 pg.setName("Extracting image " + i);
 508                                         }
 509                                         paras.add(makeParagraph(source, line));
 510                                         pg.setProgress(i++);
 511                                 }
 512                                 pg.setName(null);
 513                         }
 514
 515                         // Check quotes for "bad" format
 516                         List<Paragraph> newParas = new ArrayList<Paragraph>();
 517                         for (Paragraph para : paras) {
 518                                 newParas.addAll(requotify(para));
 519                         }
 520                         paras = newParas;
 521
 522                         // Remove double blanks/brks
 523                         fixBlanksBreaks(paras);
 524                 }
 525
 526                 return paras;
 527         }
 528
 529         /**
 530          * Convert the given line into a single {@link Paragraph}.
 531          *
 532          * @param source
 533          *            the source URL of the story
 534          * @param line
 535          *            the textual content of the paragraph
 536          *
 537          * @return the {@link Paragraph}, never NULL
 538          */
 539         private Paragraph makeParagraph(URL source, String line) {
 540                 Image image = null;
 541                 if (line.startsWith("[") && line.endsWith("]")) {
 542                         image = getImage(this, source, line.substring(1, line.length() - 1)
 543                                         .trim());
 544                 }
 545
 546                 if (image != null) {
 547                         return new Paragraph(image);
 548                 }
 549
 550                 return processPara(line);
 551         }
 552
 553         /**
 554          * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
 555          * those {@link Paragraph}s.
 556          * <p>
 557          * The resulting list will not contain a starting or trailing blank/break
 558          * nor 2 blanks or breaks following each other.
 559          *
 560          * @param paras
 561          *            the list of {@link Paragraph}s to fix
 562          */
 563         protected void fixBlanksBreaks(List<Paragraph> paras) {
 564                 boolean space = false;
 565                 boolean brk = true;
 566                 for (int i = 0; i < paras.size(); i++) {
 567                         Paragraph para = paras.get(i);
 568                         boolean thisSpace = para.getType() == ParagraphType.BLANK;
 569                         boolean thisBrk = para.getType() == ParagraphType.BREAK;
 570
 571                         if (i > 0 && space && thisBrk) {
 572                                 paras.remove(i - 1);
 573                                 i--;
 574                         } else if ((space || brk) && (thisSpace || thisBrk)) {
 575                                 paras.remove(i);
 576                                 i--;
 577                         }
 578
 579                         space = thisSpace;
 580                         brk = thisBrk;
 581                 }
 582
 583                 // Remove blank/brk at start
 584                 if (paras.size() > 0
 585                                 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
 586                                                 0).getType() == ParagraphType.BREAK)) {
 587                         paras.remove(0);
 588                 }
 589
 590                 // Remove blank/brk at end
 591                 int last = paras.size() - 1;
 592                 if (paras.size() > 0
 593                                 && (paras.get(last).getType() == ParagraphType.BLANK || paras
 594                                                 .get(last).getType() == ParagraphType.BREAK)) {
 595                         paras.remove(last);
 596                 }
 597         }
 598
 599         /**
 600          * Get the default cover related to this subject (see <tt>.info</tt> files).
 601          *
 602          * @param subject
 603          *            the subject
 604          *
 605          * @return the cover if any, or NULL
 606          */
 607         static Image getDefaultCover(String subject) {
 608                 if (subject != null && !subject.isEmpty() && Instance.getInstance().getCoverDir() != null) {
 609                         try {
 610                                 File fileCover = new File(Instance.getInstance().getCoverDir(), subject);
 611                                 return getImage(null, fileCover.toURI().toURL(), subject);
 612                         } catch (MalformedURLException e) {
 613                         }
 614                 }
 615
 616                 return null;
 617         }
 618
 619         /**
 620          * Return the list of supported image extensions.
 621          *
 622          * @param emptyAllowed
 623          *            TRUE to allow an empty extension on first place, which can be
 624          *            used when you may already have an extension in your input but
 625          *            are not sure about it
 626          *
 627          * @return the extensions
 628          */
 629         static String[] getImageExt(boolean emptyAllowed) {
 630                 if (emptyAllowed) {
 631                         return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 632                 }
 633
 634                 return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 635         }
 636
 637         /**
 638          * Check if the given resource can be a local image or a remote image, then
 639          * refresh the cache with it if it is.
 640          *
 641          * @param source
 642          *            the story source
 643          * @param line
 644          *            the resource to check
 645          *
 646          * @return the image if found, or NULL
 647          *
 648          */
 649         static Image getImage(BasicSupport_Deprecated support, URL source,
 650                         String line) {
 651                 URL url = getImageUrl(support, source, line);
 652                 if (url != null) {
 653                         if ("file".equals(url.getProtocol())) {
 654                                 if (new File(url.getPath()).isDirectory()) {
 655                                         return null;
 656                                 }
 657                         }
 658                         InputStream in = null;
 659                         try {
 660                                 in = Instance.getInstance().getCache().open(url, getSupport(url), true);
 661                                 Image img = new Image(in);
 662                                 if (img.getSize() == 0) {
 663                                         img.close();
 664                                         throw new IOException(
 665                                                         "Empty image not accepted");
 666                                 }
 667                                 return img;
 668                         } catch (IOException e) {
 669                         } finally {
 670                                 if (in != null) {
 671                                         try {
 672                                                 in.close();
 673                                         } catch (IOException e) {
 674                                         }
 675                                 }
 676                         }
 677                 }
 678
 679                 return null;
 680         }
 681
 682         /**
 683          * Check if the given resource can be a local image or a remote image, then
 684          * refresh the cache with it if it is.
 685          *
 686          * @param source
 687          *            the story source
 688          * @param line
 689          *            the resource to check
 690          *
 691          * @return the image URL if found, or NULL
 692          *
 693          */
 694         static URL getImageUrl(BasicSupport_Deprecated support, URL source,
 695                         String line) {
 696                 URL url = null;
 697
 698                 if (line != null) {
 699                         // try for files
 700                         if (source != null) {
 701                                 try {
 702                                         String relPath = null;
 703                                         String absPath = null;
 704                                         try {
 705                                                 String path = new File(source.getFile()).getParent();
 706                                                 relPath = new File(new File(path), line.trim())
 707                                                                 .getAbsolutePath();
 708                                         } catch (Exception e) {
 709                                                 // Cannot be converted to path (one possibility to take
 710                                                 // into account: absolute path on Windows)
 711                                         }
 712                                         try {
 713                                                 absPath = new File(line.trim()).getAbsolutePath();
 714                                         } catch (Exception e) {
 715                                                 // Cannot be converted to path (at all)
 716                                         }
 717
 718                                         for (String ext : getImageExt(true)) {
 719                                                 File absFile = new File(absPath + ext);
 720                                                 File relFile = new File(relPath + ext);
 721                                                 if (absPath != null && absFile.exists()
 722                                                                 && absFile.isFile()) {
 723                                                         url = absFile.toURI().toURL();
 724                                                 } else if (relPath != null && relFile.exists()
 725                                                                 && relFile.isFile()) {
 726                                                         url = relFile.toURI().toURL();
 727                                                 }
 728                                         }
 729                                 } catch (Exception e) {
 730                                         // Should not happen since we control the correct arguments
 731                                 }
 732                         }
 733
 734                         if (url == null) {
 735                                 // try for URLs
 736                                 try {
 737                                         for (String ext : getImageExt(true)) {
 738                                                 if (Instance.getInstance().getCache().check(new URL(line + ext), true)) {
 739                                                         url = new URL(line + ext);
 740                                                         break;
 741                                                 }
 742                                         }
 743
 744                                         // try out of cache
 745                                         if (url == null) {
 746                                                 for (String ext : getImageExt(true)) {
 747                                                         try {
 748                                                                 url = new URL(line + ext);
 749                                                                 Instance.getInstance().getCache().refresh(url, support, true);
 750                                                                 break;
 751                                                         } catch (IOException e) {
 752                                                                 // no image with this ext
 753                                                                 url = null;
 754                                                         }
 755                                                 }
 756                                         }
 757                                 } catch (MalformedURLException e) {
 758                                         // Not an url
 759                                 }
 760                         }
 761
 762                         // refresh the cached file
 763                         if (url != null) {
 764                                 try {
 765                                         Instance.getInstance().getCache().refresh(url, support, true);
 766                                 } catch (IOException e) {
 767                                         // woops, broken image
 768                                         url = null;
 769                                 }
 770                         }
 771                 }
 772
 773                 return url;
 774         }
 775
 776         /**
 777          * Open the input file that will be used through the support.
 778          * <p>
 779          * Can return NULL, in which case you are supposed to work without an
 780          * {@link InputStream}.
 781          *
 782          * @param source
 783          *            the source {@link URL}
 784          *
 785          * @return the {@link InputStream}
 786          *
 787          * @throws IOException
 788          *             in case of I/O error
 789          */
 790         protected InputStream openInput(URL source) throws IOException {
 791                 return Instance.getInstance().getCache().open(source, this, false);
 792         }
 793
 794         /**
 795          * Reset then return {@link BasicSupport_Deprecated#in}.
 796          *
 797          * @return {@link BasicSupport_Deprecated#in}
 798          */
 799         protected InputStream getInput() {
 800                 return reset(in);
 801         }
 802
 803         /**
 804          * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
 805          * and requotify them (i.e., separate them into QUOTE paragraphs and other
 806          * paragraphs (quotes or not)).
 807          *
 808          * @param para
 809          *            the paragraph to requotify (not necessarily a quote)
 810          *
 811          * @return the correctly (or so we hope) quotified paragraphs
 812          */
 813         protected List<Paragraph> requotify(Paragraph para) {
 814                 List<Paragraph> newParas = new ArrayList<Paragraph>();
 815
 816                 if (para.getType() == ParagraphType.QUOTE
 817                                 && para.getContent().length() > 2) {
 818                         String line = para.getContent();
 819                         boolean singleQ = line.startsWith("" + openQuote);
 820                         boolean doubleQ = line.startsWith("" + openDoubleQuote);
 821
 822                         // Do not try when more than one quote at a time
 823                         // (some stories are not easily readable if we do)
 824                         if (singleQ
 825                                         && line.indexOf(closeQuote, 1) < line
 826                                                         .lastIndexOf(closeQuote)) {
 827                                 newParas.add(para);
 828                                 return newParas;
 829                         }
 830                         if (doubleQ
 831                                         && line.indexOf(closeDoubleQuote, 1) < line
 832                                                         .lastIndexOf(closeDoubleQuote)) {
 833                                 newParas.add(para);
 834                                 return newParas;
 835                         }
 836                         //
 837
 838                         if (!singleQ && !doubleQ) {
 839                                 line = openDoubleQuote + line + closeDoubleQuote;
 840                                 newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
 841                                                 .getWords()));
 842                         } else {
 843                                 char open = singleQ ? openQuote : openDoubleQuote;
 844                                 char close = singleQ ? closeQuote : closeDoubleQuote;
 845
 846                                 int posDot = -1;
 847                                 boolean inQuote = false;
 848                                 int i = 0;
 849                                 for (char car : line.toCharArray()) {
 850                                         if (car == open) {
 851                                                 inQuote = true;
 852                                         } else if (car == close) {
 853                                                 inQuote = false;
 854                                         } else if (car == '.' && !inQuote) {
 855                                                 posDot = i;
 856                                                 break;
 857                                         }
 858                                         i++;
 859                                 }
 860
 861                                 if (posDot >= 0) {
 862                                         String rest = line.substring(posDot + 1).trim();
 863                                         line = line.substring(0, posDot + 1).trim();
 864                                         long words = 1;
 865                                         for (char car : line.toCharArray()) {
 866                                                 if (car == ' ') {
 867                                                         words++;
 868                                                 }
 869                                         }
 870                                         newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
 871                                         if (!rest.isEmpty()) {
 872                                                 newParas.addAll(requotify(processPara(rest)));
 873                                         }
 874                                 } else {
 875                                         newParas.add(para);
 876                                 }
 877                         }
 878                 } else {
 879                         newParas.add(para);
 880                 }
 881
 882                 return newParas;
 883         }
 884
 885         /**
 886          * Process a {@link Paragraph} from a raw line of text.
 887          * <p>
 888          * Will also fix quotes and HTML encoding if needed.
 889          *
 890          * @param line
 891          *            the raw line
 892          *
 893          * @return the processed {@link Paragraph}, never NULL
 894          */
 895         protected Paragraph processPara(String line) {
 896                 line = ifUnhtml(line).trim();
 897
 898                 boolean space = true;
 899                 boolean brk = true;
 900                 boolean quote = false;
 901                 boolean tentativeCloseQuote = false;
 902                 char prev = '\0';
 903                 int dashCount = 0;
 904                 long words = 1;
 905
 906                 StringBuilder builder = new StringBuilder();
 907                 for (char car : line.toCharArray()) {
 908                         if (car != '-') {
 909                                 if (dashCount > 0) {
 910                                         // dash, ndash and mdash: - – —
 911                                         // currently: always use mdash
 912                                         builder.append(dashCount == 1 ? '-' : '—');
 913                                 }
 914                                 dashCount = 0;
 915                         }
 916
 917                         if (tentativeCloseQuote) {
 918                                 tentativeCloseQuote = false;
 919                                 if (Character.isLetterOrDigit(car)) {
 920                                         builder.append("'");
 921                                 } else {
 922                                         // handle double-single quotes as double quotes
 923                                         if (prev == car) {
 924                                                 builder.append(closeDoubleQuote);
 925                                                 continue;
 926                                         }
 927
 928                                         builder.append(closeQuote);
 929                                 }
 930                         }
 931
 932                         switch (car) {
 933                         case ' ': // note: unbreakable space
 934                         case ' ':
 935                         case '\t':
 936                         case '\n': // just in case
 937                         case '\r': // just in case
 938                                 if (builder.length() > 0
 939                                                 && builder.charAt(builder.length() - 1) != ' ') {
 940                                         words++;
 941                                 }
 942                                 builder.append(' ');
 943                                 break;
 944
 945                         case '\'':
 946                                 if (space || (brk && quote)) {
 947                                         quote = true;
 948                                         // handle double-single quotes as double quotes
 949                                         if (prev == car) {
 950                                                 builder.deleteCharAt(builder.length() - 1);
 951                                                 builder.append(openDoubleQuote);
 952                                         } else {
 953                                                 builder.append(openQuote);
 954                                         }
 955                                 } else if (prev == ' ' || prev == car) {
 956                                         // handle double-single quotes as double quotes
 957                                         if (prev == car) {
 958                                                 builder.deleteCharAt(builder.length() - 1);
 959                                                 builder.append(openDoubleQuote);
 960                                         } else {
 961                                                 builder.append(openQuote);
 962                                         }
 963                                 } else {
 964                                         // it is a quote ("I'm off") or a 'quote' ("This
 965                                         // 'good' restaurant"...)
 966                                         tentativeCloseQuote = true;
 967                                 }
 968                                 break;
 969
 970                         case '"':
 971                                 if (space || (brk && quote)) {
 972                                         quote = true;
 973                                         builder.append(openDoubleQuote);
 974                                 } else if (prev == ' ') {
 975                                         builder.append(openDoubleQuote);
 976                                 } else {
 977                                         builder.append(closeDoubleQuote);
 978                                 }
 979                                 break;
 980
 981                         case '-':
 982                                 if (space) {
 983                                         quote = true;
 984                                 } else {
 985                                         dashCount++;
 986                                 }
 987                                 space = false;
 988                                 break;
 989
 990                         case '*':
 991                         case '~':
 992                         case '/':
 993                         case '\\':
 994                         case '<':
 995                         case '>':
 996                         case '=':
 997                         case '+':
 998                         case '_':
 999                         case '–':
1000                         case '—':
1001                                 space = false;
1002                                 builder.append(car);
1003                                 break;
1004
1005                         case '‘':
1006                         case '`':
1007                         case '‹':
1008                         case '﹁':
1009                         case '〈':
1010                         case '「':
1011                                 if (space || (brk && quote)) {
1012                                         quote = true;
1013                                         builder.append(openQuote);
1014                                 } else {
1015                                         // handle double-single quotes as double quotes
1016                                         if (prev == car) {
1017                                                 builder.deleteCharAt(builder.length() - 1);
1018                                                 builder.append(openDoubleQuote);
1019                                         } else {
1020                                                 builder.append(openQuote);
1021                                         }
1022                                 }
1023                                 space = false;
1024                                 brk = false;
1025                                 break;
1026
1027                         case '’':
1028                         case '›':
1029                         case '﹂':
1030                         case '〉':
1031                         case '」':
1032                                 space = false;
1033                                 brk = false;
1034                                 // handle double-single quotes as double quotes
1035                                 if (prev == car) {
1036                                         builder.deleteCharAt(builder.length() - 1);
1037                                         builder.append(closeDoubleQuote);
1038                                 } else {
1039                                         builder.append(closeQuote);
1040                                 }
1041                                 break;
1042
1043                         case '«':
1044                         case '“':
1045                         case '﹃':
1046                         case '《':
1047                         case '『':
1048                                 if (space || (brk && quote)) {
1049                                         quote = true;
1050                                         builder.append(openDoubleQuote);
1051                                 } else {
1052                                         builder.append(openDoubleQuote);
1053                                 }
1054                                 space = false;
1055                                 brk = false;
1056                                 break;
1057
1058                         case '»':
1059                         case '”':
1060                         case '﹄':
1061                         case '》':
1062                         case '』':
1063                                 space = false;
1064                                 brk = false;
1065                                 builder.append(closeDoubleQuote);
1066                                 break;
1067
1068                         default:
1069                                 space = false;
1070                                 brk = false;
1071                                 builder.append(car);
1072                                 break;
1073                         }
1074
1075                         prev = car;
1076                 }
1077
1078                 if (tentativeCloseQuote) {
1079                         tentativeCloseQuote = false;
1080                         builder.append(closeQuote);
1081                 }
1082
1083                 line = builder.toString().trim();
1084
1085                 ParagraphType type = ParagraphType.NORMAL;
1086                 if (space) {
1087                         type = ParagraphType.BLANK;
1088                 } else if (brk) {
1089                         type = ParagraphType.BREAK;
1090                 } else if (quote) {
1091                         type = ParagraphType.QUOTE;
1092                 }
1093
1094                 return new Paragraph(type, line, words);
1095         }
1096
1097         /**
1098          * Remove the HTML from the input <b>if</b>
1099          * {@link BasicSupport_Deprecated#isHtml()} is true.
1100          *
1101          * @param input
1102          *            the input
1103          *
1104          * @return the no html version if needed
1105          */
1106         private String ifUnhtml(String input) {
1107                 if (isHtml() && input != null) {
1108                         return StringUtils.unhtml(input);
1109                 }
1110
1111                 return input;
1112         }
1113
1114         /**
1115          * Reset the given {@link InputStream} and return it.
1116          *
1117          * @param in
1118          *            the {@link InputStream} to reset
1119          *
1120          * @return the same {@link InputStream} after reset
1121          */
1122         static protected InputStream reset(InputStream in) {
1123                 try {
1124                         if (in != null) {
1125                                 in.reset();
1126                         }
1127                 } catch (IOException e) {
1128                 }
1129
1130                 return in;
1131         }
1132
1133         /**
1134          * Return the first line from the given input which correspond to the given
1135          * selectors.
1136          *
1137          * @param in
1138          *            the input
1139          * @param needle
1140          *            a string that must be found inside the target line (also
1141          *            supports "^" at start to say "only if it starts with" the
1142          *            needle)
1143          * @param relativeLine
1144          *            the line to return based upon the target line position (-1 =
1145          *            the line before, 0 = the target line...)
1146          *
1147          * @return the line, or NULL if not found
1148          */
1149         static protected String getLine(InputStream in, String needle,
1150                         int relativeLine) {
1151                 return getLine(in, needle, relativeLine, true);
1152         }
1153
1154         /**
1155          * Return a line from the given input which correspond to the given
1156          * selectors.
1157          *
1158          * @param in
1159          *            the input
1160          * @param needle
1161          *            a string that must be found inside the target line (also
1162          *            supports "^" at start to say "only if it starts with" the
1163          *            needle)
1164          * @param relativeLine
1165          *            the line to return based upon the target line position (-1 =
1166          *            the line before, 0 = the target line...)
1167          * @param first
1168          *            takes the first result (as opposed to the last one, which will
1169          *            also always spend the input)
1170          *
1171          * @return the line, or NULL if not found
1172          */
1173         static protected String getLine(InputStream in, String needle,
1174                         int relativeLine, boolean first) {
1175                 String rep = null;
1176
1177                 reset(in);
1178
1179                 List<String> lines = new ArrayList<String>();
1180                 @SuppressWarnings("resource")
1181                 Scanner scan = new Scanner(in, "UTF-8");
1182                 int index = -1;
1183                 scan.useDelimiter("\\n");
1184                 while (scan.hasNext()) {
1185                         lines.add(scan.next());
1186
1187                         if (index == -1) {
1188                                 if (needle.startsWith("^")) {
1189                                         if (lines.get(lines.size() - 1).startsWith(
1190                                                         needle.substring(1))) {
1191                                                 index = lines.size() - 1;
1192                                         }
1193
1194                                 } else {
1195                                         if (lines.get(lines.size() - 1).contains(needle)) {
1196                                                 index = lines.size() - 1;
1197                                         }
1198                                 }
1199                         }
1200
1201                         if (index >= 0 && index + relativeLine < lines.size()) {
1202                                 rep = lines.get(index + relativeLine);
1203                                 if (first) {
1204                                         break;
1205                                 }
1206                         }
1207                 }
1208
1209                 return rep;
1210         }
1211
1212         /**
1213          * Return the text between the key and the endKey (and optional subKey can
1214          * be passed, in this case we will look for the key first, then take the
1215          * text between the subKey and the endKey).
1216          * <p>
1217          * Will only match the first line with the given key if more than one are
1218          * possible. Which also means that if the subKey or endKey is not found on
1219          * that line, NULL will be returned.
1220          *
1221          * @param in
1222          *            the input
1223          * @param key
1224          *            the key to match (also supports "^" at start to say
1225          *            "only if it starts with" the key)
1226          * @param subKey
1227          *            the sub key or NULL if none
1228          * @param endKey
1229          *            the end key or NULL for "up to the end"
1230          * @return the text or NULL if not found
1231          */
1232         static protected String getKeyLine(InputStream in, String key,
1233                         String subKey, String endKey) {
1234                 return getKeyText(getLine(in, key, 0), key, subKey, endKey);
1235         }
1236
1237         /**
1238          * Return the text between the key and the endKey (and optional subKey can
1239          * be passed, in this case we will look for the key first, then take the
1240          * text between the subKey and the endKey).
1241          *
1242          * @param in
1243          *            the input
1244          * @param key
1245          *            the key to match (also supports "^" at start to say
1246          *            "only if it starts with" the key)
1247          * @param subKey
1248          *            the sub key or NULL if none
1249          * @param endKey
1250          *            the end key or NULL for "up to the end"
1251          * @return the text or NULL if not found
1252          */
1253         static protected String getKeyText(String in, String key, String subKey,
1254                         String endKey) {
1255                 String result = null;
1256
1257                 String line = in;
1258                 if (line != null && line.contains(key)) {
1259                         line = line.substring(line.indexOf(key) + key.length());
1260                         if (subKey == null || subKey.isEmpty() || line.contains(subKey)) {
1261                                 if (subKey != null) {
1262                                         line = line.substring(line.indexOf(subKey)
1263                                                         + subKey.length());
1264                                 }
1265                                 if (endKey == null || line.contains(endKey)) {
1266                                         if (endKey != null) {
1267                                                 line = line.substring(0, line.indexOf(endKey));
1268                                                 result = line;
1269                                         }
1270                                 }
1271                         }
1272                 }
1273
1274                 return result;
1275         }
1276
1277         /**
1278          * Return the text between the key and the endKey (optional subKeys can be
1279          * passed, in this case we will look for the subKeys first, then take the
1280          * text between the key and the endKey).
1281          *
1282          * @param in
1283          *            the input
1284          * @param key
1285          *            the key to match
1286          * @param endKey
1287          *            the end key or NULL for "up to the end"
1288          * @param afters
1289          *            the sub-keys to find before checking for key/endKey
1290          *
1291          * @return the text or NULL if not found
1292          */
1293         static protected String getKeyTextAfter(String in, String key,
1294                         String endKey, String... afters) {
1295
1296                 if (in != null && !in.isEmpty()) {
1297                         int pos = indexOfAfter(in, 0, afters);
1298                         if (pos < 0) {
1299                                 return null;
1300                         }
1301
1302                         in = in.substring(pos);
1303                 }
1304
1305                 return getKeyText(in, key, null, endKey);
1306         }
1307
1308         /**
1309          * Return the first index after all the given "afters" have been found in
1310          * the {@link String}, or -1 if it was not possible.
1311          *
1312          * @param in
1313          *            the input
1314          * @param startAt
1315          *            start at this position in the string
1316          * @param afters
1317          *            the sub-keys to find before checking for key/endKey
1318          *
1319          * @return the text or NULL if not found
1320          */
1321         static protected int indexOfAfter(String in, int startAt, String... afters) {
1322                 int pos = -1;
1323                 if (in != null && !in.isEmpty()) {
1324                         pos = startAt;
1325                         if (afters != null) {
1326                                 for (int i = 0; pos >= 0 && i < afters.length; i++) {
1327                                         String subKey = afters[i];
1328                                         if (!subKey.isEmpty()) {
1329                                                 pos = in.indexOf(subKey, pos);
1330                                                 if (pos >= 0) {
1331                                                         pos += subKey.length();
1332                                                 }
1333                                         }
1334                                 }
1335                         }
1336                 }
1337
1338                 return pos;
1339         }
1340 }