src/be/nikiroo/fanfix/supported/BasicSupport_Deprecated.java

   1 package be.nikiroo.fanfix.supported;
   2
   3 import java.io.BufferedReader;
   4 import java.io.ByteArrayInputStream;
   5 import java.io.File;
   6 import java.io.IOException;
   7 import java.io.InputStream;
   8 import java.io.InputStreamReader;
   9 import java.net.MalformedURLException;
  10 import java.net.URL;
  11 import java.util.ArrayList;
  12 import java.util.Date;
  13 import java.util.List;
  14 import java.util.Map.Entry;
  15 import java.util.Scanner;
  16
  17 import be.nikiroo.fanfix.Instance;
  18 import be.nikiroo.fanfix.bundles.Config;
  19 import be.nikiroo.fanfix.bundles.StringId;
  20 import be.nikiroo.fanfix.data.Chapter;
  21 import be.nikiroo.fanfix.data.MetaData;
  22 import be.nikiroo.fanfix.data.Paragraph;
  23 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
  24 import be.nikiroo.fanfix.data.Story;
  25 import be.nikiroo.utils.Image;
  26 import be.nikiroo.utils.Progress;
  27 import be.nikiroo.utils.StringUtils;
  28
  29 /**
  30  * DEPRECATED: use the new Jsoup 'Node' system.
  31  * <p>
  32  * This class is the base class used by the other support classes. It can be
  33  * used outside of this package, and have static method that you can use to get
  34  * access to the correct support class.
  35  * <p>
  36  * It will be used with 'resources' (usually web pages or files).
  37  *
  38  * @author niki
  39  */
  40 @Deprecated
  41 public abstract class BasicSupport_Deprecated extends BasicSupport {
  42         private InputStream in;
  43
  44         // quote chars
  45         private char openQuote = Instance.getInstance().getTrans().getCharacter(StringId.OPEN_SINGLE_QUOTE);
  46         private char closeQuote = Instance.getInstance().getTrans().getCharacter(StringId.CLOSE_SINGLE_QUOTE);
  47         private char openDoubleQuote = Instance.getInstance().getTrans().getCharacter(StringId.OPEN_DOUBLE_QUOTE);
  48         private char closeDoubleQuote = Instance.getInstance().getTrans().getCharacter(StringId.CLOSE_DOUBLE_QUOTE);
  49
  50         // New methods not used in Deprecated mode
  51         @Override
  52         protected String getDesc() throws IOException {
  53                 throw new RuntimeException("should not be used by legacy code");
  54         }
  55
  56         @Override
  57         protected MetaData getMeta() throws IOException {
  58                 throw new RuntimeException("should not be used by legacy code");
  59         }
  60
  61         @Override
  62         protected List<Entry<String, URL>> getChapters(Progress pg)
  63                         throws IOException {
  64                 throw new RuntimeException("should not be used by legacy code");
  65         }
  66
  67         @Override
  68         protected String getChapterContent(URL chapUrl, int number, Progress pg)
  69                         throws IOException {
  70                 throw new RuntimeException("should not be used by legacy code");
  71         }
  72
  73         @Override
  74         public Story process(Progress pg) throws IOException {
  75                 return process(getSource(), pg);
  76         }
  77
  78         //
  79
  80         /**
  81          * Return the {@link MetaData} of this story.
  82          *
  83          * @param source
  84          *            the source of the story
  85          * @param in
  86          *            the input (the main resource)
  87          *
  88          * @return the associated {@link MetaData}, never NULL
  89          *
  90          * @throws IOException
  91          *             in case of I/O error
  92          */
  93         protected abstract MetaData getMeta(URL source, InputStream in)
  94                         throws IOException;
  95
  96         /**
  97          * Return the story description.
  98          *
  99          * @param source
 100          *            the source of the story
 101          * @param in
 102          *            the input (the main resource)
 103          *
 104          * @return the description
 105          *
 106          * @throws IOException
 107          *             in case of I/O error
 108          */
 109         protected abstract String getDesc(URL source, InputStream in)
 110                         throws IOException;
 111
 112         /**
 113          * Return the list of chapters (name and resource).
 114          *
 115          * @param source
 116          *            the source of the story
 117          * @param in
 118          *            the input (the main resource)
 119          * @param pg
 120          *            the optional progress reporter
 121          *
 122          * @return the chapters
 123          *
 124          * @throws IOException
 125          *             in case of I/O error
 126          */
 127         protected abstract List<Entry<String, URL>> getChapters(URL source,
 128                         InputStream in, Progress pg) throws IOException;
 129
 130         /**
 131          * Return the content of the chapter (possibly HTML encoded, if
 132          * {@link BasicSupport_Deprecated#isHtml()} is TRUE).
 133          *
 134          * @param source
 135          *            the source of the story
 136          * @param in
 137          *            the input (the main resource)
 138          * @param number
 139          *            the chapter number
 140          * @param pg
 141          *            the optional progress reporter
 142          *
 143          * @return the content
 144          *
 145          * @throws IOException
 146          *             in case of I/O error
 147          */
 148         protected abstract String getChapterContent(URL source, InputStream in,
 149                         int number, Progress pg) throws IOException;
 150
 151         /**
 152          * Process the given story resource into a partially filled {@link Story}
 153          * object containing the name and metadata, except for the description.
 154          *
 155          * @param url
 156          *            the story resource
 157          *
 158          * @return the {@link Story}
 159          *
 160          * @throws IOException
 161          *             in case of I/O error
 162          */
 163         public Story processMeta(URL url) throws IOException {
 164                 return processMeta(url, true, false, null);
 165         }
 166
 167         /**
 168          * Process the given story resource into a partially filled {@link Story}
 169          * object containing the name and metadata.
 170          *
 171          * @param url
 172          *            the story resource
 173          * @param close
 174          *            close "this" and "in" when done
 175          * @param getDesc
 176          *            retrieve the description of the story, or not
 177          * @param pg
 178          *            the optional progress reporter
 179          *
 180          * @return the {@link Story}, never NULL
 181          *
 182          * @throws IOException
 183          *             in case of I/O error
 184          */
 185         protected Story processMeta(URL url, boolean close, boolean getDesc,
 186                         Progress pg) throws IOException {
 187                 if (pg == null) {
 188                         pg = new Progress();
 189                 } else {
 190                         pg.setMinMax(0, 100);
 191                 }
 192
 193                 login();
 194                 pg.setProgress(10);
 195
 196                 url = getCanonicalUrl(url);
 197
 198                 setCurrentReferer(url);
 199
 200                 in = openInput(url); // NULL allowed here
 201                 try {
 202                         preprocess(url, getInput());
 203                         pg.setProgress(30);
 204
 205                         Story story = new Story();
 206                         MetaData meta = getMeta(url, getInput());
 207                         if (meta.getCreationDate() == null
 208                                         || meta.getCreationDate().trim().isEmpty()) {
 209                                 meta.setCreationDate(bsHelper.formatDate(
 210                                                 StringUtils.fromTime(new Date().getTime())));
 211                         }
 212                         story.setMeta(meta);
 213                         pg.put("meta", meta);
 214
 215                         pg.setProgress(50);
 216
 217                         if (meta.getCover() == null) {
 218                                 meta.setCover(getDefaultCover(meta.getSubject()));
 219                         }
 220
 221                         pg.setProgress(60);
 222
 223                         if (getDesc) {
 224                                 String descChapterName = Instance.getInstance().getTrans().getString(StringId.DESCRIPTION);
 225                                 story.getMeta().setResume(makeChapter(url, 0, descChapterName, getDesc(url, getInput()), null));
 226                         }
 227
 228                         pg.setProgress(100);
 229                         return story;
 230                 } finally {
 231                         if (close) {
 232                                 close();
 233
 234                                 if (in != null) {
 235                                         in.close();
 236                                 }
 237                         }
 238                 }
 239         }
 240
 241         /**
 242          * Process the given story resource into a fully filled {@link Story}
 243          * object.
 244          *
 245          * @param url
 246          *            the story resource
 247          * @param pg
 248          *            the optional progress reporter
 249          *
 250          * @return the {@link Story}, never NULL
 251          *
 252          * @throws IOException
 253          *             in case of I/O error
 254          */
 255         protected Story process(URL url, Progress pg) throws IOException {
 256                 if (pg == null) {
 257                         pg = new Progress();
 258                 } else {
 259                         pg.setMinMax(0, 100);
 260                 }
 261
 262                 url = getCanonicalUrl(url);
 263                 pg.setProgress(1);
 264                 try {
 265                         Progress pgMeta = new Progress();
 266                         pg.addProgress(pgMeta, 10);
 267                         Story story = processMeta(url, false, true, pgMeta);
 268                         pg.put("meta", story.getMeta());
 269                         if (!pgMeta.isDone()) {
 270                                 pgMeta.setProgress(pgMeta.getMax()); // 10%
 271                         }
 272
 273                         setCurrentReferer(url);
 274
 275                         Progress pgGetChapters = new Progress();
 276                         pg.addProgress(pgGetChapters, 10);
 277                         story.setChapters(new ArrayList<Chapter>());
 278                         List<Entry<String, URL>> chapters = getChapters(url, getInput(),
 279                                         pgGetChapters);
 280                         if (!pgGetChapters.isDone()) {
 281                                 pgGetChapters.setProgress(pgGetChapters.getMax()); // 20%
 282                         }
 283
 284                         if (chapters != null) {
 285                                 Progress pgChaps = new Progress("Extracting chapters", 0,
 286                                                 chapters.size() * 300);
 287                                 pg.addProgress(pgChaps, 80);
 288
 289                                 long words = 0;
 290                                 int i = 1;
 291                                 for (Entry<String, URL> chap : chapters) {
 292                                         pgChaps.setName("Extracting chapter " + i);
 293                                         InputStream chapIn = null;
 294                                         if (chap.getValue() != null) {
 295                                                 setCurrentReferer(chap.getValue());
 296                                                 chapIn = Instance.getInstance().getCache().open(chap.getValue(), this, false);
 297                                         }
 298                                         pgChaps.setProgress(i * 100);
 299                                         try {
 300                                                 Progress pgGetChapterContent = new Progress();
 301                                                 Progress pgMakeChapter = new Progress();
 302                                                 pgChaps.addProgress(pgGetChapterContent, 100);
 303                                                 pgChaps.addProgress(pgMakeChapter, 100);
 304
 305                                                 String content = getChapterContent(url, chapIn, i,
 306                                                                 pgGetChapterContent);
 307                                                 if (!pgGetChapterContent.isDone()) {
 308                                                         pgGetChapterContent.setProgress(pgGetChapterContent
 309                                                                         .getMax());
 310                                                 }
 311
 312                                                 Chapter cc = makeChapter(url, i, chap.getKey(),
 313                                                                 content, pgMakeChapter);
 314                                                 if (!pgMakeChapter.isDone()) {
 315                                                         pgMakeChapter.setProgress(pgMakeChapter.getMax());
 316                                                 }
 317
 318                                                 words += cc.getWords();
 319                                                 story.getChapters().add(cc);
 320                                         } finally {
 321                                                 if (chapIn != null) {
 322                                                         chapIn.close();
 323                                                 }
 324                                         }
 325
 326                                         i++;
 327                                 }
 328
 329                                 story.getMeta().setWords(words);
 330
 331                                 pgChaps.setName("Extracting chapters");
 332                         } else {
 333                                 pg.setProgress(80);
 334                         }
 335
 336                         // Check for "no chapters" stories
 337                         if (story.getChapters().isEmpty()
 338                                         && story.getMeta().getResume() != null
 339                                         && !story.getMeta().getResume().getParagraphs().isEmpty()) {
 340                                 Chapter resume = story.getMeta().getResume();
 341                                 resume.setName("");
 342                                 resume.setNumber(1);
 343                                 story.getChapters().add(resume);
 344                                 story.getMeta().setWords(resume.getWords());
 345
 346                                 String descChapterName = Instance.getInstance().getTrans()
 347                                                 .getString(StringId.DESCRIPTION);
 348                                 resume = new Chapter(0, descChapterName);
 349                                 story.getMeta().setResume(resume);
 350                         }
 351
 352                         return story;
 353                 } finally {
 354                         close();
 355
 356                         if (in != null) {
 357                                 in.close();
 358                         }
 359                 }
 360         }
 361
 362         /**
 363          * Prepare the support if needed before processing.
 364          *
 365          * @param source
 366          *            the source of the story
 367          * @param in
 368          *            the input (the main resource)
 369          *
 370          * @throws IOException
 371          *             on I/O error
 372          */
 373         @SuppressWarnings("unused")
 374         protected void preprocess(URL source, InputStream in) throws IOException {
 375         }
 376
 377         /**
 378          * Create a {@link Chapter} object from the given information, formatting
 379          * the content as it should be.
 380          *
 381          * @param source
 382          *            the source of the story
 383          * @param number
 384          *            the chapter number
 385          * @param name
 386          *            the chapter name
 387          * @param content
 388          *            the chapter content
 389          * @param pg
 390          *            the optional progress reporter
 391          *
 392          * @return the {@link Chapter}, never NULL
 393          *
 394          * @throws IOException
 395          *             in case of I/O error
 396          */
 397         protected Chapter makeChapter(URL source, int number, String name,
 398                         String content, Progress pg) throws IOException {
 399                 // Chapter name: process it correctly, then remove the possible
 400                 // redundant "Chapter x: " in front of it, or "-" (as in
 401                 // "Chapter 5: - Fun!" after the ": " was automatically added)
 402                 String chapterName = processPara(name).getContent().trim();
 403                 for (String lang : Instance.getInstance().getConfig().getList(Config.CONF_CHAPTER)) {
 404                         String chapterWord = Instance.getInstance().getConfig().getStringX(Config.CONF_CHAPTER, lang);
 405                         if (chapterName.startsWith(chapterWord)) {
 406                                 chapterName = chapterName.substring(chapterWord.length())
 407                                                 .trim();
 408                                 break;
 409                         }
 410                 }
 411
 412                 if (chapterName.startsWith(Integer.toString(number))) {
 413                         chapterName = chapterName.substring(
 414                                         Integer.toString(number).length()).trim();
 415                 }
 416
 417                 while (chapterName.startsWith(":") || chapterName.startsWith("-")) {
 418                         chapterName = chapterName.substring(1).trim();
 419                 }
 420                 //
 421
 422                 Chapter chap = new Chapter(number, chapterName);
 423
 424                 if (content != null) {
 425                         List<Paragraph> paras = makeParagraphs(source, content, pg);
 426                         long words = 0;
 427                         for (Paragraph para : paras) {
 428                                 words += para.getWords();
 429                         }
 430                         chap.setParagraphs(paras);
 431                         chap.setWords(words);
 432                 }
 433
 434                 return chap;
 435
 436         }
 437
 438         /**
 439          * Convert the given content into {@link Paragraph}s.
 440          *
 441          * @param source
 442          *            the source URL of the story
 443          * @param content
 444          *            the textual content
 445          * @param pg
 446          *            the optional progress reporter
 447          *
 448          * @return the {@link Paragraph}s (can be empty, but never NULL)
 449          *
 450          * @throws IOException
 451          *             in case of I/O error
 452          */
 453         protected List<Paragraph> makeParagraphs(URL source, String content,
 454                         Progress pg) throws IOException {
 455                 if (pg == null) {
 456                         pg = new Progress();
 457                 }
 458
 459                 if (isHtml()) {
 460                         // Special <HR> processing:
 461                         content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
 462                                         "<br/>* * *<br/>");
 463                 }
 464
 465                 List<Paragraph> paras = new ArrayList<Paragraph>();
 466                 if (content != null && !content.trim().isEmpty()) {
 467                         if (isHtml()) {
 468                                 String[] tab = content.split("(<p>|</p>|<br>|<br/>)");
 469                                 pg.setMinMax(0, tab.length);
 470                                 int i = 1;
 471                                 for (String line : tab) {
 472                                         if (line.startsWith("[") && line.endsWith("]")) {
 473                                                 pg.setName("Extracting image " + i);
 474                                         }
 475                                         paras.add(makeParagraph(source, line.trim()));
 476                                         pg.setProgress(i++);
 477                                 }
 478                                 pg.setName(null);
 479                         } else {
 480                                 List<String> lines = new ArrayList<String>();
 481                                 BufferedReader buff = null;
 482                                 try {
 483                                         buff = new BufferedReader(
 484                                                         new InputStreamReader(new ByteArrayInputStream(
 485                                                                         content.getBytes("UTF-8")), "UTF-8"));
 486                                         for (String line = buff.readLine(); line != null; line = buff
 487                                                         .readLine()) {
 488                                                 lines.add(line.trim());
 489                                         }
 490                                 } finally {
 491                                         if (buff != null) {
 492                                                 buff.close();
 493                                         }
 494                                 }
 495
 496                                 pg.setMinMax(0, lines.size());
 497                                 int i = 0;
 498                                 for (String line : lines) {
 499                                         if (line.startsWith("[") && line.endsWith("]")) {
 500                                                 pg.setName("Extracting image " + i);
 501                                         }
 502                                         paras.add(makeParagraph(source, line));
 503                                         pg.setProgress(i++);
 504                                 }
 505                                 pg.setName(null);
 506                         }
 507
 508                         // Check quotes for "bad" format
 509                         List<Paragraph> newParas = new ArrayList<Paragraph>();
 510                         for (Paragraph para : paras) {
 511                                 newParas.addAll(requotify(para));
 512                         }
 513                         paras = newParas;
 514
 515                         // Remove double blanks/brks
 516                         fixBlanksBreaks(paras);
 517                 }
 518
 519                 return paras;
 520         }
 521
 522         /**
 523          * Convert the given line into a single {@link Paragraph}.
 524          *
 525          * @param source
 526          *            the source URL of the story
 527          * @param line
 528          *            the textual content of the paragraph
 529          *
 530          * @return the {@link Paragraph}, never NULL
 531          */
 532         private Paragraph makeParagraph(URL source, String line) {
 533                 Image image = null;
 534                 if (line.startsWith("[") && line.endsWith("]")) {
 535                         image = getImage(this, source, line.substring(1, line.length() - 1)
 536                                         .trim());
 537                 }
 538
 539                 if (image != null) {
 540                         return new Paragraph(image);
 541                 }
 542
 543                 return processPara(line);
 544         }
 545
 546         /**
 547          * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
 548          * those {@link Paragraph}s.
 549          * <p>
 550          * The resulting list will not contain a starting or trailing blank/break
 551          * nor 2 blanks or breaks following each other.
 552          *
 553          * @param paras
 554          *            the list of {@link Paragraph}s to fix
 555          */
 556         protected void fixBlanksBreaks(List<Paragraph> paras) {
 557                 boolean space = false;
 558                 boolean brk = true;
 559                 for (int i = 0; i < paras.size(); i++) {
 560                         Paragraph para = paras.get(i);
 561                         boolean thisSpace = para.getType() == ParagraphType.BLANK;
 562                         boolean thisBrk = para.getType() == ParagraphType.BREAK;
 563
 564                         if (i > 0 && space && thisBrk) {
 565                                 paras.remove(i - 1);
 566                                 i--;
 567                         } else if ((space || brk) && (thisSpace || thisBrk)) {
 568                                 paras.remove(i);
 569                                 i--;
 570                         }
 571
 572                         space = thisSpace;
 573                         brk = thisBrk;
 574                 }
 575
 576                 // Remove blank/brk at start
 577                 if (paras.size() > 0
 578                                 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
 579                                                 0).getType() == ParagraphType.BREAK)) {
 580                         paras.remove(0);
 581                 }
 582
 583                 // Remove blank/brk at end
 584                 int last = paras.size() - 1;
 585                 if (paras.size() > 0
 586                                 && (paras.get(last).getType() == ParagraphType.BLANK || paras
 587                                                 .get(last).getType() == ParagraphType.BREAK)) {
 588                         paras.remove(last);
 589                 }
 590         }
 591
 592         /**
 593          * Get the default cover related to this subject (see <tt>.info</tt> files).
 594          *
 595          * @param subject
 596          *            the subject
 597          *
 598          * @return the cover if any, or NULL
 599          */
 600         static Image getDefaultCover(String subject) {
 601                 if (subject != null && !subject.isEmpty() && Instance.getInstance().getCoverDir() != null) {
 602                         try {
 603                                 File fileCover = new File(Instance.getInstance().getCoverDir(), subject);
 604                                 return getImage(null, fileCover.toURI().toURL(), subject);
 605                         } catch (MalformedURLException e) {
 606                         }
 607                 }
 608
 609                 return null;
 610         }
 611
 612         /**
 613          * Return the list of supported image extensions.
 614          *
 615          * @param emptyAllowed
 616          *            TRUE to allow an empty extension on first place, which can be
 617          *            used when you may already have an extension in your input but
 618          *            are not sure about it
 619          *
 620          * @return the extensions
 621          */
 622         static String[] getImageExt(boolean emptyAllowed) {
 623                 if (emptyAllowed) {
 624                         return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 625                 }
 626
 627                 return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
 628         }
 629
 630         /**
 631          * Check if the given resource can be a local image or a remote image, then
 632          * refresh the cache with it if it is.
 633          *
 634          * @param source
 635          *            the story source
 636          * @param line
 637          *            the resource to check
 638          *
 639          * @return the image if found, or NULL
 640          *
 641          */
 642         static Image getImage(BasicSupport_Deprecated support, URL source,
 643                         String line) {
 644                 URL url = getImageUrl(support, source, line);
 645                 if (url != null) {
 646                         if ("file".equals(url.getProtocol())) {
 647                                 if (new File(url.getPath()).isDirectory()) {
 648                                         return null;
 649                                 }
 650                         }
 651                         InputStream in = null;
 652                         try {
 653                                 in = Instance.getInstance().getCache().open(url, getSupport(url), true);
 654                                 return new Image(in);
 655                         } catch (IOException e) {
 656                         } finally {
 657                                 if (in != null) {
 658                                         try {
 659                                                 in.close();
 660                                         } catch (IOException e) {
 661                                         }
 662                                 }
 663                         }
 664                 }
 665
 666                 return null;
 667         }
 668
 669         /**
 670          * Check if the given resource can be a local image or a remote image, then
 671          * refresh the cache with it if it is.
 672          *
 673          * @param source
 674          *            the story source
 675          * @param line
 676          *            the resource to check
 677          *
 678          * @return the image URL if found, or NULL
 679          *
 680          */
 681         static URL getImageUrl(BasicSupport_Deprecated support, URL source,
 682                         String line) {
 683                 URL url = null;
 684
 685                 if (line != null) {
 686                         // try for files
 687                         if (source != null) {
 688                                 try {
 689                                         String relPath = null;
 690                                         String absPath = null;
 691                                         try {
 692                                                 String path = new File(source.getFile()).getParent();
 693                                                 relPath = new File(new File(path), line.trim())
 694                                                                 .getAbsolutePath();
 695                                         } catch (Exception e) {
 696                                                 // Cannot be converted to path (one possibility to take
 697                                                 // into account: absolute path on Windows)
 698                                         }
 699                                         try {
 700                                                 absPath = new File(line.trim()).getAbsolutePath();
 701                                         } catch (Exception e) {
 702                                                 // Cannot be converted to path (at all)
 703                                         }
 704
 705                                         for (String ext : getImageExt(true)) {
 706                                                 File absFile = new File(absPath + ext);
 707                                                 File relFile = new File(relPath + ext);
 708                                                 if (absPath != null && absFile.exists()
 709                                                                 && absFile.isFile()) {
 710                                                         url = absFile.toURI().toURL();
 711                                                 } else if (relPath != null && relFile.exists()
 712                                                                 && relFile.isFile()) {
 713                                                         url = relFile.toURI().toURL();
 714                                                 }
 715                                         }
 716                                 } catch (Exception e) {
 717                                         // Should not happen since we control the correct arguments
 718                                 }
 719                         }
 720
 721                         if (url == null) {
 722                                 // try for URLs
 723                                 try {
 724                                         for (String ext : getImageExt(true)) {
 725                                                 if (Instance.getInstance().getCache().check(new URL(line + ext), true)) {
 726                                                         url = new URL(line + ext);
 727                                                         break;
 728                                                 }
 729                                         }
 730
 731                                         // try out of cache
 732                                         if (url == null) {
 733                                                 for (String ext : getImageExt(true)) {
 734                                                         try {
 735                                                                 url = new URL(line + ext);
 736                                                                 Instance.getInstance().getCache().refresh(url, support, true);
 737                                                                 break;
 738                                                         } catch (IOException e) {
 739                                                                 // no image with this ext
 740                                                                 url = null;
 741                                                         }
 742                                                 }
 743                                         }
 744                                 } catch (MalformedURLException e) {
 745                                         // Not an url
 746                                 }
 747                         }
 748
 749                         // refresh the cached file
 750                         if (url != null) {
 751                                 try {
 752                                         Instance.getInstance().getCache().refresh(url, support, true);
 753                                 } catch (IOException e) {
 754                                         // woops, broken image
 755                                         url = null;
 756                                 }
 757                         }
 758                 }
 759
 760                 return url;
 761         }
 762
 763         /**
 764          * Open the input file that will be used through the support.
 765          * <p>
 766          * Can return NULL, in which case you are supposed to work without an
 767          * {@link InputStream}.
 768          *
 769          * @param source
 770          *            the source {@link URL}
 771          *
 772          * @return the {@link InputStream}
 773          *
 774          * @throws IOException
 775          *             in case of I/O error
 776          */
 777         protected InputStream openInput(URL source) throws IOException {
 778                 return Instance.getInstance().getCache().open(source, this, false);
 779         }
 780
 781         /**
 782          * Reset then return {@link BasicSupport_Deprecated#in}.
 783          *
 784          * @return {@link BasicSupport_Deprecated#in}
 785          */
 786         protected InputStream getInput() {
 787                 return reset(in);
 788         }
 789
 790         /**
 791          * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
 792          * and requotify them (i.e., separate them into QUOTE paragraphs and other
 793          * paragraphs (quotes or not)).
 794          *
 795          * @param para
 796          *            the paragraph to requotify (not necessarily a quote)
 797          *
 798          * @return the correctly (or so we hope) quotified paragraphs
 799          */
 800         protected List<Paragraph> requotify(Paragraph para) {
 801                 List<Paragraph> newParas = new ArrayList<Paragraph>();
 802
 803                 if (para.getType() == ParagraphType.QUOTE
 804                                 && para.getContent().length() > 2) {
 805                         String line = para.getContent();
 806                         boolean singleQ = line.startsWith("" + openQuote);
 807                         boolean doubleQ = line.startsWith("" + openDoubleQuote);
 808
 809                         // Do not try when more than one quote at a time
 810                         // (some stories are not easily readable if we do)
 811                         if (singleQ
 812                                         && line.indexOf(closeQuote, 1) < line
 813                                                         .lastIndexOf(closeQuote)) {
 814                                 newParas.add(para);
 815                                 return newParas;
 816                         }
 817                         if (doubleQ
 818                                         && line.indexOf(closeDoubleQuote, 1) < line
 819                                                         .lastIndexOf(closeDoubleQuote)) {
 820                                 newParas.add(para);
 821                                 return newParas;
 822                         }
 823                         //
 824
 825                         if (!singleQ && !doubleQ) {
 826                                 line = openDoubleQuote + line + closeDoubleQuote;
 827                                 newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
 828                                                 .getWords()));
 829                         } else {
 830                                 char open = singleQ ? openQuote : openDoubleQuote;
 831                                 char close = singleQ ? closeQuote : closeDoubleQuote;
 832
 833                                 int posDot = -1;
 834                                 boolean inQuote = false;
 835                                 int i = 0;
 836                                 for (char car : line.toCharArray()) {
 837                                         if (car == open) {
 838                                                 inQuote = true;
 839                                         } else if (car == close) {
 840                                                 inQuote = false;
 841                                         } else if (car == '.' && !inQuote) {
 842                                                 posDot = i;
 843                                                 break;
 844                                         }
 845                                         i++;
 846                                 }
 847
 848                                 if (posDot >= 0) {
 849                                         String rest = line.substring(posDot + 1).trim();
 850                                         line = line.substring(0, posDot + 1).trim();
 851                                         long words = 1;
 852                                         for (char car : line.toCharArray()) {
 853                                                 if (car == ' ') {
 854                                                         words++;
 855                                                 }
 856                                         }
 857                                         newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
 858                                         if (!rest.isEmpty()) {
 859                                                 newParas.addAll(requotify(processPara(rest)));
 860                                         }
 861                                 } else {
 862                                         newParas.add(para);
 863                                 }
 864                         }
 865                 } else {
 866                         newParas.add(para);
 867                 }
 868
 869                 return newParas;
 870         }
 871
 872         /**
 873          * Process a {@link Paragraph} from a raw line of text.
 874          * <p>
 875          * Will also fix quotes and HTML encoding if needed.
 876          *
 877          * @param line
 878          *            the raw line
 879          *
 880          * @return the processed {@link Paragraph}, never NULL
 881          */
 882         protected Paragraph processPara(String line) {
 883                 line = ifUnhtml(line).trim();
 884
 885                 boolean space = true;
 886                 boolean brk = true;
 887                 boolean quote = false;
 888                 boolean tentativeCloseQuote = false;
 889                 char prev = '\0';
 890                 int dashCount = 0;
 891                 long words = 1;
 892
 893                 StringBuilder builder = new StringBuilder();
 894                 for (char car : line.toCharArray()) {
 895                         if (car != '-') {
 896                                 if (dashCount > 0) {
 897                                         // dash, ndash and mdash: - – —
 898                                         // currently: always use mdash
 899                                         builder.append(dashCount == 1 ? '-' : '—');
 900                                 }
 901                                 dashCount = 0;
 902                         }
 903
 904                         if (tentativeCloseQuote) {
 905                                 tentativeCloseQuote = false;
 906                                 if (Character.isLetterOrDigit(car)) {
 907                                         builder.append("'");
 908                                 } else {
 909                                         // handle double-single quotes as double quotes
 910                                         if (prev == car) {
 911                                                 builder.append(closeDoubleQuote);
 912                                                 continue;
 913                                         }
 914
 915                                         builder.append(closeQuote);
 916                                 }
 917                         }
 918
 919                         switch (car) {
 920                         case ' ': // note: unbreakable space
 921                         case ' ':
 922                         case '\t':
 923                         case '\n': // just in case
 924                         case '\r': // just in case
 925                                 if (builder.length() > 0
 926                                                 && builder.charAt(builder.length() - 1) != ' ') {
 927                                         words++;
 928                                 }
 929                                 builder.append(' ');
 930                                 break;
 931
 932                         case '\'':
 933                                 if (space || (brk && quote)) {
 934                                         quote = true;
 935                                         // handle double-single quotes as double quotes
 936                                         if (prev == car) {
 937                                                 builder.deleteCharAt(builder.length() - 1);
 938                                                 builder.append(openDoubleQuote);
 939                                         } else {
 940                                                 builder.append(openQuote);
 941                                         }
 942                                 } else if (prev == ' ' || prev == car) {
 943                                         // handle double-single quotes as double quotes
 944                                         if (prev == car) {
 945                                                 builder.deleteCharAt(builder.length() - 1);
 946                                                 builder.append(openDoubleQuote);
 947                                         } else {
 948                                                 builder.append(openQuote);
 949                                         }
 950                                 } else {
 951                                         // it is a quote ("I'm off") or a 'quote' ("This
 952                                         // 'good' restaurant"...)
 953                                         tentativeCloseQuote = true;
 954                                 }
 955                                 break;
 956
 957                         case '"':
 958                                 if (space || (brk && quote)) {
 959                                         quote = true;
 960                                         builder.append(openDoubleQuote);
 961                                 } else if (prev == ' ') {
 962                                         builder.append(openDoubleQuote);
 963                                 } else {
 964                                         builder.append(closeDoubleQuote);
 965                                 }
 966                                 break;
 967
 968                         case '-':
 969                                 if (space) {
 970                                         quote = true;
 971                                 } else {
 972                                         dashCount++;
 973                                 }
 974                                 space = false;
 975                                 break;
 976
 977                         case '*':
 978                         case '~':
 979                         case '/':
 980                         case '\\':
 981                         case '<':
 982                         case '>':
 983                         case '=':
 984                         case '+':
 985                         case '_':
 986                         case '–':
 987                         case '—':
 988                                 space = false;
 989                                 builder.append(car);
 990                                 break;
 991
 992                         case '‘':
 993                         case '`':
 994                         case '‹':
 995                         case '﹁':
 996                         case '〈':
 997                         case '「':
 998                                 if (space || (brk && quote)) {
 999                                         quote = true;
1000                                         builder.append(openQuote);
1001                                 } else {
1002                                         // handle double-single quotes as double quotes
1003                                         if (prev == car) {
1004                                                 builder.deleteCharAt(builder.length() - 1);
1005                                                 builder.append(openDoubleQuote);
1006                                         } else {
1007                                                 builder.append(openQuote);
1008                                         }
1009                                 }
1010                                 space = false;
1011                                 brk = false;
1012                                 break;
1013
1014                         case '’':
1015                         case '›':
1016                         case '﹂':
1017                         case '〉':
1018                         case '」':
1019                                 space = false;
1020                                 brk = false;
1021                                 // handle double-single quotes as double quotes
1022                                 if (prev == car) {
1023                                         builder.deleteCharAt(builder.length() - 1);
1024                                         builder.append(closeDoubleQuote);
1025                                 } else {
1026                                         builder.append(closeQuote);
1027                                 }
1028                                 break;
1029
1030                         case '«':
1031                         case '“':
1032                         case '﹃':
1033                         case '《':
1034                         case '『':
1035                                 if (space || (brk && quote)) {
1036                                         quote = true;
1037                                         builder.append(openDoubleQuote);
1038                                 } else {
1039                                         builder.append(openDoubleQuote);
1040                                 }
1041                                 space = false;
1042                                 brk = false;
1043                                 break;
1044
1045                         case '»':
1046                         case '”':
1047                         case '﹄':
1048                         case '》':
1049                         case '』':
1050                                 space = false;
1051                                 brk = false;
1052                                 builder.append(closeDoubleQuote);
1053                                 break;
1054
1055                         default:
1056                                 space = false;
1057                                 brk = false;
1058                                 builder.append(car);
1059                                 break;
1060                         }
1061
1062                         prev = car;
1063                 }
1064
1065                 if (tentativeCloseQuote) {
1066                         tentativeCloseQuote = false;
1067                         builder.append(closeQuote);
1068                 }
1069
1070                 line = builder.toString().trim();
1071
1072                 ParagraphType type = ParagraphType.NORMAL;
1073                 if (space) {
1074                         type = ParagraphType.BLANK;
1075                 } else if (brk) {
1076                         type = ParagraphType.BREAK;
1077                 } else if (quote) {
1078                         type = ParagraphType.QUOTE;
1079                 }
1080
1081                 return new Paragraph(type, line, words);
1082         }
1083
1084         /**
1085          * Remove the HTML from the input <b>if</b>
1086          * {@link BasicSupport_Deprecated#isHtml()} is true.
1087          *
1088          * @param input
1089          *            the input
1090          *
1091          * @return the no html version if needed
1092          */
1093         private String ifUnhtml(String input) {
1094                 if (isHtml() && input != null) {
1095                         return StringUtils.unhtml(input);
1096                 }
1097
1098                 return input;
1099         }
1100
1101         /**
1102          * Reset the given {@link InputStream} and return it.
1103          *
1104          * @param in
1105          *            the {@link InputStream} to reset
1106          *
1107          * @return the same {@link InputStream} after reset
1108          */
1109         static protected InputStream reset(InputStream in) {
1110                 try {
1111                         if (in != null) {
1112                                 in.reset();
1113                         }
1114                 } catch (IOException e) {
1115                 }
1116
1117                 return in;
1118         }
1119
1120         /**
1121          * Return the first line from the given input which correspond to the given
1122          * selectors.
1123          *
1124          * @param in
1125          *            the input
1126          * @param needle
1127          *            a string that must be found inside the target line (also
1128          *            supports "^" at start to say "only if it starts with" the
1129          *            needle)
1130          * @param relativeLine
1131          *            the line to return based upon the target line position (-1 =
1132          *            the line before, 0 = the target line...)
1133          *
1134          * @return the line, or NULL if not found
1135          */
1136         static protected String getLine(InputStream in, String needle,
1137                         int relativeLine) {
1138                 return getLine(in, needle, relativeLine, true);
1139         }
1140
1141         /**
1142          * Return a line from the given input which correspond to the given
1143          * selectors.
1144          *
1145          * @param in
1146          *            the input
1147          * @param needle
1148          *            a string that must be found inside the target line (also
1149          *            supports "^" at start to say "only if it starts with" the
1150          *            needle)
1151          * @param relativeLine
1152          *            the line to return based upon the target line position (-1 =
1153          *            the line before, 0 = the target line...)
1154          * @param first
1155          *            takes the first result (as opposed to the last one, which will
1156          *            also always spend the input)
1157          *
1158          * @return the line, or NULL if not found
1159          */
1160         static protected String getLine(InputStream in, String needle,
1161                         int relativeLine, boolean first) {
1162                 String rep = null;
1163
1164                 reset(in);
1165
1166                 List<String> lines = new ArrayList<String>();
1167                 @SuppressWarnings("resource")
1168                 Scanner scan = new Scanner(in, "UTF-8");
1169                 int index = -1;
1170                 scan.useDelimiter("\\n");
1171                 while (scan.hasNext()) {
1172                         lines.add(scan.next());
1173
1174                         if (index == -1) {
1175                                 if (needle.startsWith("^")) {
1176                                         if (lines.get(lines.size() - 1).startsWith(
1177                                                         needle.substring(1))) {
1178                                                 index = lines.size() - 1;
1179                                         }
1180
1181                                 } else {
1182                                         if (lines.get(lines.size() - 1).contains(needle)) {
1183                                                 index = lines.size() - 1;
1184                                         }
1185                                 }
1186                         }
1187
1188                         if (index >= 0 && index + relativeLine < lines.size()) {
1189                                 rep = lines.get(index + relativeLine);
1190                                 if (first) {
1191                                         break;
1192                                 }
1193                         }
1194                 }
1195
1196                 return rep;
1197         }
1198
1199         /**
1200          * Return the text between the key and the endKey (and optional subKey can
1201          * be passed, in this case we will look for the key first, then take the
1202          * text between the subKey and the endKey).
1203          * <p>
1204          * Will only match the first line with the given key if more than one are
1205          * possible. Which also means that if the subKey or endKey is not found on
1206          * that line, NULL will be returned.
1207          *
1208          * @param in
1209          *            the input
1210          * @param key
1211          *            the key to match (also supports "^" at start to say
1212          *            "only if it starts with" the key)
1213          * @param subKey
1214          *            the sub key or NULL if none
1215          * @param endKey
1216          *            the end key or NULL for "up to the end"
1217          * @return the text or NULL if not found
1218          */
1219         static protected String getKeyLine(InputStream in, String key,
1220                         String subKey, String endKey) {
1221                 return getKeyText(getLine(in, key, 0), key, subKey, endKey);
1222         }
1223
1224         /**
1225          * Return the text between the key and the endKey (and optional subKey can
1226          * be passed, in this case we will look for the key first, then take the
1227          * text between the subKey and the endKey).
1228          *
1229          * @param in
1230          *            the input
1231          * @param key
1232          *            the key to match (also supports "^" at start to say
1233          *            "only if it starts with" the key)
1234          * @param subKey
1235          *            the sub key or NULL if none
1236          * @param endKey
1237          *            the end key or NULL for "up to the end"
1238          * @return the text or NULL if not found
1239          */
1240         static protected String getKeyText(String in, String key, String subKey,
1241                         String endKey) {
1242                 String result = null;
1243
1244                 String line = in;
1245                 if (line != null && line.contains(key)) {
1246                         line = line.substring(line.indexOf(key) + key.length());
1247                         if (subKey == null || subKey.isEmpty() || line.contains(subKey)) {
1248                                 if (subKey != null) {
1249                                         line = line.substring(line.indexOf(subKey)
1250                                                         + subKey.length());
1251                                 }
1252                                 if (endKey == null || line.contains(endKey)) {
1253                                         if (endKey != null) {
1254                                                 line = line.substring(0, line.indexOf(endKey));
1255                                                 result = line;
1256                                         }
1257                                 }
1258                         }
1259                 }
1260
1261                 return result;
1262         }
1263
1264         /**
1265          * Return the text between the key and the endKey (optional subKeys can be
1266          * passed, in this case we will look for the subKeys first, then take the
1267          * text between the key and the endKey).
1268          *
1269          * @param in
1270          *            the input
1271          * @param key
1272          *            the key to match
1273          * @param endKey
1274          *            the end key or NULL for "up to the end"
1275          * @param afters
1276          *            the sub-keys to find before checking for key/endKey
1277          *
1278          * @return the text or NULL if not found
1279          */
1280         static protected String getKeyTextAfter(String in, String key,
1281                         String endKey, String... afters) {
1282
1283                 if (in != null && !in.isEmpty()) {
1284                         int pos = indexOfAfter(in, 0, afters);
1285                         if (pos < 0) {
1286                                 return null;
1287                         }
1288
1289                         in = in.substring(pos);
1290                 }
1291
1292                 return getKeyText(in, key, null, endKey);
1293         }
1294
1295         /**
1296          * Return the first index after all the given "afters" have been found in
1297          * the {@link String}, or -1 if it was not possible.
1298          *
1299          * @param in
1300          *            the input
1301          * @param startAt
1302          *            start at this position in the string
1303          * @param afters
1304          *            the sub-keys to find before checking for key/endKey
1305          *
1306          * @return the text or NULL if not found
1307          */
1308         static protected int indexOfAfter(String in, int startAt, String... afters) {
1309                 int pos = -1;
1310                 if (in != null && !in.isEmpty()) {
1311                         pos = startAt;
1312                         if (afters != null) {
1313                                 for (int i = 0; pos >= 0 && i < afters.length; i++) {
1314                                         String subKey = afters[i];
1315                                         if (!subKey.isEmpty()) {
1316                                                 pos = in.indexOf(subKey, pos);
1317                                                 if (pos >= 0) {
1318                                                         pos += subKey.length();
1319                                                 }
1320                                         }
1321                                 }
1322                         }
1323                 }
1324
1325                 return pos;
1326         }
1327 }