UI: progress bars fixes, update nikiroo-utils
[fanfix.git] / src / be / nikiroo / fanfix / supported / BasicSupport.java
CommitLineData
08fe2e33
NR
1package be.nikiroo.fanfix.supported;
2
68686a37 3import java.awt.image.BufferedImage;
68e370a4 4import java.io.BufferedReader;
08fe2e33
NR
5import java.io.ByteArrayInputStream;
6import java.io.File;
7import java.io.IOException;
8import java.io.InputStream;
68e370a4 9import java.io.InputStreamReader;
08fe2e33
NR
10import java.net.MalformedURLException;
11import java.net.URL;
08fe2e33 12import java.util.ArrayList;
793f1071 13import java.util.Date;
08fe2e33
NR
14import java.util.HashMap;
15import java.util.List;
16import java.util.Map;
17import java.util.Map.Entry;
18import java.util.Scanner;
19
20import be.nikiroo.fanfix.Instance;
21import be.nikiroo.fanfix.bundles.Config;
22import be.nikiroo.fanfix.bundles.StringId;
23import be.nikiroo.fanfix.data.Chapter;
24import be.nikiroo.fanfix.data.MetaData;
25import be.nikiroo.fanfix.data.Paragraph;
08fe2e33 26import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
9252c65e 27import be.nikiroo.fanfix.data.Story;
595dfa7a 28import be.nikiroo.utils.IOUtils;
3b2b638f 29import be.nikiroo.utils.Progress;
08fe2e33
NR
30import be.nikiroo.utils.StringUtils;
31
32/**
33 * This class is the base class used by the other support classes. It can be
34 * used outside of this package, and have static method that you can use to get
35 * access to the correct support class.
36 * <p>
37 * It will be used with 'resources' (usually web pages or files).
38 *
39 * @author niki
40 */
41public abstract class BasicSupport {
42 /**
43 * The supported input types for which we can get a {@link BasicSupport}
44 * object.
45 *
46 * @author niki
47 */
48 public enum SupportType {
49 /** EPUB files created with this program */
50 EPUB,
51 /** Pure text file with some rules */
52 TEXT,
53 /** TEXT but with associated .info file */
54 INFO_TEXT,
55 /** My Little Pony fanfictions */
56 FIMFICTION,
57 /** Fanfictions from a lot of different universes */
58 FANFICTION,
59 /** Website with lots of Mangas */
60 MANGAFOX,
61 /** Furry website with comics support */
62 E621,
a4143cd7
NR
63 /** Furry website with stories */
64 YIFFSTAR,
08fe2e33 65 /** CBZ files */
373da363
NR
66 CBZ,
67 /** HTML files */
68 HTML;
08fe2e33
NR
69
70 /**
71 * A description of this support type (more information than the
72 * {@link BasicSupport#getSourceName()}).
73 *
74 * @return the description
75 */
76 public String getDesc() {
77 String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
78 this.name());
79
80 if (desc == null) {
81 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
82 }
83
84 return desc;
85 }
86
87 /**
88 * The name of this support type (a short version).
89 *
90 * @return the name
91 */
92 public String getSourceName() {
93 BasicSupport support = BasicSupport.getSupport(this);
94 if (support != null) {
95 return support.getSourceName();
96 }
97
98 return null;
99 }
100
101 @Override
102 public String toString() {
103 return super.toString().toLowerCase();
104 }
105
106 /**
107 * Call {@link SupportType#valueOf(String.toUpperCase())}.
108 *
109 * @param typeName
110 * the possible type name
111 *
112 * @return NULL or the type
113 */
114 public static SupportType valueOfUC(String typeName) {
115 return SupportType.valueOf(typeName == null ? null : typeName
116 .toUpperCase());
117 }
118
119 /**
120 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
121 * NULL for NULL instead of raising exception.
122 *
123 * @param typeName
124 * the possible type name
125 *
126 * @return NULL or the type
127 */
128 public static SupportType valueOfNullOkUC(String typeName) {
129 if (typeName == null) {
130 return null;
131 }
132
133 return SupportType.valueOfUC(typeName);
134 }
135
136 /**
137 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
138 * NULL in case of error instead of raising an exception.
139 *
140 * @param typeName
141 * the possible type name
142 *
143 * @return NULL or the type
144 */
145 public static SupportType valueOfAllOkUC(String typeName) {
146 try {
147 return SupportType.valueOfUC(typeName);
148 } catch (Exception e) {
149 return null;
150 }
151 }
152 }
153
08fe2e33
NR
154 private InputStream in;
155 private SupportType type;
22848428 156 private URL currentReferer; // with only one 'r', as in 'HTTP'...
08fe2e33
NR
157
158 // quote chars
159 private char openQuote = Instance.getTrans().getChar(
160 StringId.OPEN_SINGLE_QUOTE);
161 private char closeQuote = Instance.getTrans().getChar(
162 StringId.CLOSE_SINGLE_QUOTE);
163 private char openDoubleQuote = Instance.getTrans().getChar(
164 StringId.OPEN_DOUBLE_QUOTE);
165 private char closeDoubleQuote = Instance.getTrans().getChar(
166 StringId.CLOSE_DOUBLE_QUOTE);
167
168 /**
169 * The name of this support class.
170 *
171 * @return the name
172 */
173 protected abstract String getSourceName();
174
175 /**
176 * Check if the given resource is supported by this {@link BasicSupport}.
177 *
178 * @param url
179 * the resource to check for
180 *
181 * @return TRUE if it is
182 */
183 protected abstract boolean supports(URL url);
184
185 /**
186 * Return TRUE if the support will return HTML encoded content values for
187 * the chapters content.
188 *
189 * @return TRUE for HTML
190 */
191 protected abstract boolean isHtml();
192
68686a37 193 protected abstract MetaData getMeta(URL source, InputStream in)
08fe2e33
NR
194 throws IOException;
195
196 /**
197 * Return the story description.
198 *
199 * @param source
200 * the source of the story
201 * @param in
202 * the input (the main resource)
203 *
204 * @return the description
205 *
206 * @throws IOException
207 * in case of I/O error
208 */
209 protected abstract String getDesc(URL source, InputStream in)
210 throws IOException;
211
08fe2e33
NR
212 /**
213 * Return the list of chapters (name and resource).
214 *
215 * @param source
216 * the source of the story
217 * @param in
218 * the input (the main resource)
219 *
220 * @return the chapters
221 *
222 * @throws IOException
223 * in case of I/O error
224 */
225 protected abstract List<Entry<String, URL>> getChapters(URL source,
226 InputStream in) throws IOException;
227
228 /**
229 * Return the content of the chapter (possibly HTML encoded, if
230 * {@link BasicSupport#isHtml()} is TRUE).
231 *
232 * @param source
233 * the source of the story
234 * @param in
235 * the input (the main resource)
236 * @param number
237 * the chapter number
238 *
239 * @return the content
240 *
241 * @throws IOException
242 * in case of I/O error
243 */
244 protected abstract String getChapterContent(URL source, InputStream in,
245 int number) throws IOException;
246
6e06d2cc
NR
247 /**
248 * Log into the support (can be a no-op depending upon the support).
249 *
250 * @throws IOException
251 * in case of I/O error
252 */
253 public void login() throws IOException {
254
255 }
256
08fe2e33
NR
257 /**
258 * Return the list of cookies (values included) that must be used to
259 * correctly fetch the resources.
260 * <p>
261 * You are expected to call the super method implementation if you override
262 * it.
263 *
264 * @return the cookies
6e06d2cc
NR
265 *
266 * @throws IOException
267 * in case of I/O error
08fe2e33 268 */
6e06d2cc 269 public Map<String, String> getCookies() throws IOException {
08fe2e33
NR
270 return new HashMap<String, String>();
271 }
272
a4143cd7
NR
273 /**
274 * Return the canonical form of the main {@link URL}.
275 *
276 * @param source
277 * the source {@link URL}
278 *
279 * @return the canonical form of this {@link URL}
280 *
281 * @throws IOException
282 * in case of I/O error
283 */
284 public URL getCanonicalUrl(URL source) throws IOException {
285 return source;
286 }
287
08fe2e33
NR
288 /**
289 * Process the given story resource into a partially filled {@link Story}
290 * object containing the name and metadata, except for the description.
291 *
292 * @param url
293 * the story resource
294 *
295 * @return the {@link Story}
296 *
297 * @throws IOException
298 * in case of I/O error
299 */
300 public Story processMeta(URL url) throws IOException {
301 return processMeta(url, true, false);
302 }
303
304 /**
305 * Process the given story resource into a partially filled {@link Story}
306 * object containing the name and metadata.
307 *
308 * @param url
309 * the story resource
310 *
311 * @param close
312 * close "this" and "in" when done
313 *
314 * @return the {@link Story}
315 *
316 * @throws IOException
317 * in case of I/O error
318 */
319 protected Story processMeta(URL url, boolean close, boolean getDesc)
320 throws IOException {
6e06d2cc
NR
321 login();
322
a4143cd7
NR
323 url = getCanonicalUrl(url);
324
325 setCurrentReferer(url);
326
373da363 327 in = openInput(url);
08fe2e33
NR
328 if (in == null) {
329 return null;
330 }
331
332 try {
68686a37 333 preprocess(url, getInput());
08fe2e33
NR
334
335 Story story = new Story();
68686a37 336 MetaData meta = getMeta(url, getInput());
793f1071
NR
337 if (meta.getCreationDate() == null
338 || meta.getCreationDate().isEmpty()) {
339 meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
340 }
68686a37
NR
341 story.setMeta(meta);
342
343 if (meta != null && meta.getCover() == null) {
344 meta.setCover(getDefaultCover(meta.getSubject()));
345 }
08fe2e33
NR
346
347 if (getDesc) {
348 String descChapterName = Instance.getTrans().getString(
349 StringId.DESCRIPTION);
350 story.getMeta().setResume(
351 makeChapter(url, 0, descChapterName,
352 getDesc(url, getInput())));
353 }
354
355 return story;
356 } finally {
357 if (close) {
358 try {
359 close();
360 } catch (IOException e) {
361 Instance.syserr(e);
362 }
363
364 if (in != null) {
365 in.close();
366 }
367 }
a4143cd7
NR
368
369 setCurrentReferer(null);
08fe2e33
NR
370 }
371 }
372
373 /**
374 * Process the given story resource into a fully filled {@link Story}
375 * object.
376 *
377 * @param url
378 * the story resource
92fb0719
NR
379 * @param pg
380 * the optional progress reporter
08fe2e33
NR
381 *
382 * @return the {@link Story}
383 *
384 * @throws IOException
385 * in case of I/O error
386 */
92fb0719
NR
387 public Story process(URL url, Progress pg) throws IOException {
388 if (pg == null) {
389 pg = new Progress();
390 } else {
391 pg.setMinMax(0, 100);
392 }
393
a4143cd7 394 url = getCanonicalUrl(url);
92fb0719 395 pg.setProgress(1);
08fe2e33
NR
396 try {
397 Story story = processMeta(url, false, true);
92fb0719 398 pg.setProgress(10);
08fe2e33 399 if (story == null) {
92fb0719 400 pg.setProgress(100);
08fe2e33
NR
401 return null;
402 }
403
754a5bc2
NR
404 pg.setName("Retrieving " + story.getMeta().getTitle());
405
a4143cd7
NR
406 setCurrentReferer(url);
407
08fe2e33
NR
408 story.setChapters(new ArrayList<Chapter>());
409
08fe2e33 410 List<Entry<String, URL>> chapters = getChapters(url, getInput());
92fb0719
NR
411 pg.setProgress(20);
412
08fe2e33
NR
413 int i = 1;
414 if (chapters != null) {
92fb0719
NR
415 Progress pgChaps = new Progress(0, chapters.size());
416 pg.addProgress(pgChaps, 80);
417
793f1071 418 long words = 0;
08fe2e33
NR
419 for (Entry<String, URL> chap : chapters) {
420 setCurrentReferer(chap.getValue());
421 InputStream chapIn = Instance.getCache().open(
422 chap.getValue(), this, true);
423 try {
793f1071
NR
424 Chapter cc = makeChapter(url, i, chap.getKey(),
425 getChapterContent(url, chapIn, i));
426 words += cc.getWords();
427 story.getChapters().add(cc);
428 if (story.getMeta() != null) {
429 story.getMeta().setWords(words);
430 }
08fe2e33
NR
431 } finally {
432 chapIn.close();
433 }
a6395bef 434
3b2b638f 435 pgChaps.setProgress(i++);
08fe2e33 436 }
92fb0719
NR
437 } else {
438 pg.setProgress(100);
08fe2e33
NR
439 }
440
441 return story;
442
443 } finally {
444 try {
445 close();
446 } catch (IOException e) {
447 Instance.syserr(e);
448 }
449
450 if (in != null) {
451 in.close();
452 }
453
a4143cd7 454 setCurrentReferer(null);
08fe2e33
NR
455 }
456 }
457
458 /**
a4143cd7 459 * The support type.
08fe2e33
NR
460 *
461 * @return the type
462 */
463 public SupportType getType() {
464 return type;
465 }
466
467 /**
468 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
469 * the current {@link URL} we work on.
470 *
471 * @return the referer
472 */
473 public URL getCurrentReferer() {
474 return currentReferer;
475 }
476
477 /**
478 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
479 * the current {@link URL} we work on.
480 *
481 * @param currentReferer
482 * the new referer
483 */
484 protected void setCurrentReferer(URL currentReferer) {
485 this.currentReferer = currentReferer;
486 }
487
488 /**
489 * The support type.
490 *
491 * @param type
492 * the new type
493 *
494 * @return this
495 */
496 protected BasicSupport setType(SupportType type) {
497 this.type = type;
498 return this;
499 }
500
501 /**
68686a37 502 * Prepare the support if needed before processing.
08fe2e33
NR
503 *
504 * @param source
505 * the source of the story
506 * @param in
507 * the input (the main resource)
508 *
08fe2e33
NR
509 * @throws IOException
510 * on I/O error
511 */
68686a37 512 protected void preprocess(URL source, InputStream in) throws IOException {
08fe2e33
NR
513 }
514
515 /**
516 * Now that we have processed the {@link Story}, close the resources if any.
517 *
518 * @throws IOException
519 * on I/O error
520 */
521 protected void close() throws IOException {
522 }
523
524 /**
525 * Create a {@link Chapter} object from the given information, formatting
526 * the content as it should be.
527 *
528 * @param number
529 * the chapter number
530 * @param name
531 * the chapter name
532 * @param content
533 * the chapter content
534 *
535 * @return the {@link Chapter}
536 *
537 * @throws IOException
538 * in case of I/O error
539 */
540 protected Chapter makeChapter(URL source, int number, String name,
541 String content) throws IOException {
08fe2e33
NR
542 // Chapter name: process it correctly, then remove the possible
543 // redundant "Chapter x: " in front of it
544 String chapterName = processPara(name).getContent().trim();
545 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
546 .split(",")) {
547 String chapterWord = Instance.getConfig().getStringX(
548 Config.CHAPTER, lang);
549 if (chapterName.startsWith(chapterWord)) {
550 chapterName = chapterName.substring(chapterWord.length())
551 .trim();
552 break;
553 }
554 }
555
556 if (chapterName.startsWith(Integer.toString(number))) {
557 chapterName = chapterName.substring(
558 Integer.toString(number).length()).trim();
559 }
560
561 if (chapterName.startsWith(":")) {
562 chapterName = chapterName.substring(1).trim();
563 }
564 //
565
566 Chapter chap = new Chapter(number, chapterName);
567
68e370a4 568 if (content != null) {
793f1071
NR
569 List<Paragraph> paras = makeParagraphs(source, content);
570 long words = 0;
571 for (Paragraph para : paras) {
572 words += para.getWords();
573 }
574 chap.setParagraphs(paras);
575 chap.setWords(words);
08fe2e33
NR
576 }
577
68e370a4
NR
578 return chap;
579
580 }
581
582 /**
583 * Convert the given content into {@link Paragraph}s.
584 *
585 * @param source
586 * the source URL of the story
587 * @param content
588 * the textual content
589 *
590 * @return the {@link Paragraph}s
591 *
592 * @throws IOException
593 * in case of I/O error
594 */
595 protected List<Paragraph> makeParagraphs(URL source, String content)
596 throws IOException {
08fe2e33
NR
597 if (isHtml()) {
598 // Special <HR> processing:
599 content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
600 "\n* * *\n");
601 }
602
68e370a4 603 List<Paragraph> paras = new ArrayList<Paragraph>();
9252c65e 604 InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8"));
08fe2e33 605 try {
68e370a4
NR
606 BufferedReader buff = new BufferedReader(new InputStreamReader(in,
607 "UTF-8"));
608
609 for (String encodedLine = buff.readLine(); encodedLine != null; encodedLine = buff
610 .readLine()) {
611 String lines[];
612 if (isHtml()) {
613 lines = encodedLine.split("(<p>|</p>|<br>|<br/>|\\n)");
614 } else {
615 lines = new String[] { encodedLine };
08fe2e33
NR
616 }
617
68e370a4
NR
618 for (String aline : lines) {
619 String line = aline.trim();
620
621 URL image = null;
622 if (line.startsWith("[") && line.endsWith("]")) {
623 image = getImageUrl(this, source,
624 line.substring(1, line.length() - 1).trim());
625 }
626
627 if (image != null) {
628 paras.add(new Paragraph(image));
629 } else {
630 paras.add(processPara(line));
631 }
08fe2e33
NR
632 }
633 }
68e370a4
NR
634 } finally {
635 in.close();
636 }
08fe2e33 637
68e370a4
NR
638 // Check quotes for "bad" format
639 List<Paragraph> newParas = new ArrayList<Paragraph>();
640 for (Paragraph para : paras) {
641 newParas.addAll(requotify(para));
642 }
643 paras = newParas;
08fe2e33 644
68e370a4
NR
645 // Remove double blanks/brks
646 fixBlanksBreaks(paras);
08fe2e33 647
68e370a4
NR
648 return paras;
649 }
08fe2e33 650
68e370a4
NR
651 /**
652 * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
653 * those {@link Paragraph}s.
654 * <p>
655 * The resulting list will not contain a starting or trailing blank/break
656 * nor 2 blanks or breaks following each other.
657 *
658 * @param paras
659 * the list of {@link Paragraph}s to fix
660 */
661 protected void fixBlanksBreaks(List<Paragraph> paras) {
662 boolean space = false;
663 boolean brk = true;
664 for (int i = 0; i < paras.size(); i++) {
665 Paragraph para = paras.get(i);
666 boolean thisSpace = para.getType() == ParagraphType.BLANK;
667 boolean thisBrk = para.getType() == ParagraphType.BREAK;
668
669 if (i > 0 && space && thisBrk) {
670 paras.remove(i - 1);
671 i--;
672 } else if ((space || brk) && (thisSpace || thisBrk)) {
673 paras.remove(i);
674 i--;
08fe2e33
NR
675 }
676
68e370a4
NR
677 space = thisSpace;
678 brk = thisBrk;
679 }
08fe2e33 680
68e370a4
NR
681 // Remove blank/brk at start
682 if (paras.size() > 0
683 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
684 0).getType() == ParagraphType.BREAK)) {
685 paras.remove(0);
686 }
687
688 // Remove blank/brk at end
689 int last = paras.size() - 1;
690 if (paras.size() > 0
691 && (paras.get(last).getType() == ParagraphType.BLANK || paras
692 .get(last).getType() == ParagraphType.BREAK)) {
693 paras.remove(last);
08fe2e33
NR
694 }
695 }
696
68e370a4
NR
697 /**
698 * Get the default cover related to this subject (see <tt>.info</tt> files).
699 *
700 * @param subject
701 * the subject
702 *
703 * @return the cover if any, or NULL
704 */
68686a37
NR
705 static BufferedImage getDefaultCover(String subject) {
706 if (subject != null && !subject.isEmpty()
707 && Instance.getCoverDir() != null) {
708 try {
709 File fileCover = new File(Instance.getCoverDir(), subject);
333f0e7b 710 return getImage(null, fileCover.toURI().toURL(), subject);
68686a37
NR
711 } catch (MalformedURLException e) {
712 }
713 }
714
715 return null;
716 }
717
08fe2e33
NR
718 /**
719 * Return the list of supported image extensions.
720 *
a4143cd7
NR
721 * @param emptyAllowed
722 * TRUE to allow an empty extension on first place, which can be
723 * used when you may already have an extension in your input but
724 * are not sure about it
725 *
08fe2e33
NR
726 * @return the extensions
727 */
68686a37 728 static String[] getImageExt(boolean emptyAllowed) {
08fe2e33
NR
729 if (emptyAllowed) {
730 return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
731 } else {
732 return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
733 }
734 }
735
a4143cd7
NR
736 /**
737 * Check if the given resource can be a local image or a remote image, then
738 * refresh the cache with it if it is.
739 *
740 * @param source
741 * the story source
742 * @param line
743 * the resource to check
744 *
745 * @return the image if found, or NULL
746 *
747 */
333f0e7b
NR
748 static BufferedImage getImage(BasicSupport support, URL source, String line) {
749 URL url = getImageUrl(support, source, line);
68686a37
NR
750 if (url != null) {
751 InputStream in = null;
752 try {
753 in = Instance.getCache().open(url, getSupport(url), true);
595dfa7a 754 return IOUtils.toImage(in);
68686a37
NR
755 } catch (IOException e) {
756 } finally {
757 if (in != null) {
758 try {
759 in.close();
760 } catch (IOException e) {
761 }
762 }
763 }
764 }
765
766 return null;
767 }
768
08fe2e33
NR
769 /**
770 * Check if the given resource can be a local image or a remote image, then
771 * refresh the cache with it if it is.
772 *
773 * @param source
774 * the story source
775 * @param line
776 * the resource to check
777 *
778 * @return the image URL if found, or NULL
779 *
780 */
333f0e7b 781 static URL getImageUrl(BasicSupport support, URL source, String line) {
08fe2e33
NR
782 URL url = null;
783
68686a37
NR
784 if (line != null) {
785 // try for files
786 String path = null;
787 if (source != null) {
788 path = new File(source.getFile()).getParent();
789 try {
333f0e7b
NR
790 String basePath = new File(new File(path), line.trim())
791 .getAbsolutePath();
68686a37 792 for (String ext : getImageExt(true)) {
333f0e7b
NR
793 if (new File(basePath + ext).exists()) {
794 url = new File(basePath + ext).toURI().toURL();
68686a37 795 }
08fe2e33 796 }
68686a37
NR
797 } catch (Exception e) {
798 // Nothing to do here
08fe2e33 799 }
68686a37 800 }
08fe2e33 801
68686a37
NR
802 if (url == null) {
803 // try for URLs
804 try {
08fe2e33 805 for (String ext : getImageExt(true)) {
68686a37 806 if (Instance.getCache().check(new URL(line + ext))) {
08fe2e33 807 url = new URL(line + ext);
333f0e7b 808 break;
08fe2e33
NR
809 }
810 }
68686a37
NR
811
812 // try out of cache
813 if (url == null) {
814 for (String ext : getImageExt(true)) {
815 try {
816 url = new URL(line + ext);
333f0e7b 817 Instance.getCache().refresh(url, support, true);
68686a37
NR
818 break;
819 } catch (IOException e) {
820 // no image with this ext
821 url = null;
822 }
823 }
824 }
825 } catch (MalformedURLException e) {
826 // Not an url
08fe2e33 827 }
08fe2e33 828 }
08fe2e33 829
68686a37
NR
830 // refresh the cached file
831 if (url != null) {
832 try {
333f0e7b 833 Instance.getCache().refresh(url, support, true);
68686a37
NR
834 } catch (IOException e) {
835 // woops, broken image
836 url = null;
837 }
08fe2e33
NR
838 }
839 }
840
841 return url;
842 }
843
373da363
NR
844 /**
845 * Open the input file that will be used through the support.
846 *
847 * @param source
848 * the source {@link URL}
849 *
850 * @return the {@link InputStream}
851 *
852 * @throws IOException
853 * in case of I/O error
854 */
855 protected InputStream openInput(URL source) throws IOException {
856 return Instance.getCache().open(source, this, false);
857 }
858
a4143cd7
NR
859 /**
860 * Reset the given {@link InputStream} and return it.
861 *
862 * @param in
863 * the {@link InputStream} to reset
864 *
865 * @return the same {@link InputStream} after reset
866 */
68686a37
NR
867 protected InputStream reset(InputStream in) {
868 try {
869 in.reset();
870 } catch (IOException e) {
871 }
872 return in;
873 }
874
08fe2e33
NR
875 /**
876 * Reset then return {@link BasicSupport#in}.
877 *
878 * @return {@link BasicSupport#in}
08fe2e33 879 */
68686a37
NR
880 protected InputStream getInput() {
881 return reset(in);
08fe2e33
NR
882 }
883
884 /**
885 * Fix the author name if it is prefixed with some "by" {@link String}.
886 *
887 * @param author
888 * the author with a possible prefix
889 *
890 * @return the author without prefixes
891 */
68686a37 892 protected String fixAuthor(String author) {
08fe2e33
NR
893 if (author != null) {
894 for (String suffix : new String[] { " ", ":" }) {
895 for (String byString : Instance.getConfig()
896 .getString(Config.BYS).split(",")) {
897 byString += suffix;
898 if (author.toUpperCase().startsWith(byString.toUpperCase())) {
899 author = author.substring(byString.length()).trim();
900 }
901 }
902 }
903
904 // Special case (without suffix):
905 if (author.startsWith("©")) {
906 author = author.substring(1);
907 }
908 }
909
910 return author;
911 }
912
913 /**
914 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
915 * and requotify them (i.e., separate them into QUOTE paragraphs and other
916 * paragraphs (quotes or not)).
917 *
918 * @param para
a4143cd7 919 * the paragraph to requotify (not necessarily a quote)
08fe2e33
NR
920 *
921 * @return the correctly (or so we hope) quotified paragraphs
922 */
68e370a4 923 protected List<Paragraph> requotify(Paragraph para) {
08fe2e33
NR
924 List<Paragraph> newParas = new ArrayList<Paragraph>();
925
68686a37
NR
926 if (para.getType() == ParagraphType.QUOTE
927 && para.getContent().length() > 2) {
08fe2e33
NR
928 String line = para.getContent();
929 boolean singleQ = line.startsWith("" + openQuote);
930 boolean doubleQ = line.startsWith("" + openDoubleQuote);
931
b4dc6ab5
NR
932 // Do not try when more than one quote at a time
933 // (some stories are not easily readable if we do)
934 if (singleQ
935 && line.indexOf(closeQuote, 1) < line
936 .lastIndexOf(closeQuote)) {
937 newParas.add(para);
938 return newParas;
939 }
940 if (doubleQ
941 && line.indexOf(closeDoubleQuote, 1) < line
942 .lastIndexOf(closeDoubleQuote)) {
943 newParas.add(para);
944 return newParas;
945 }
946 //
947
08fe2e33
NR
948 if (!singleQ && !doubleQ) {
949 line = openDoubleQuote + line + closeDoubleQuote;
793f1071
NR
950 newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
951 .getWords()));
08fe2e33 952 } else {
a6395bef 953 char open = singleQ ? openQuote : openDoubleQuote;
08fe2e33 954 char close = singleQ ? closeQuote : closeDoubleQuote;
a6395bef
NR
955
956 int posDot = -1;
957 boolean inQuote = false;
958 int i = 0;
959 for (char car : line.toCharArray()) {
960 if (car == open) {
961 inQuote = true;
962 } else if (car == close) {
963 inQuote = false;
964 } else if (car == '.' && !inQuote) {
965 posDot = i;
966 break;
967 }
968 i++;
08fe2e33
NR
969 }
970
971 if (posDot >= 0) {
972 String rest = line.substring(posDot + 1).trim();
973 line = line.substring(0, posDot + 1).trim();
793f1071
NR
974 long words = 1;
975 for (char car : line.toCharArray()) {
976 if (car == ' ') {
977 words++;
978 }
979 }
980 newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
68686a37
NR
981 if (!rest.isEmpty()) {
982 newParas.addAll(requotify(processPara(rest)));
983 }
08fe2e33
NR
984 } else {
985 newParas.add(para);
986 }
987 }
988 } else {
989 newParas.add(para);
990 }
991
992 return newParas;
993 }
994
995 /**
996 * Process a {@link Paragraph} from a raw line of text.
997 * <p>
998 * Will also fix quotes and HTML encoding if needed.
999 *
1000 * @param line
1001 * the raw line
1002 *
1003 * @return the processed {@link Paragraph}
1004 */
22848428 1005 protected Paragraph processPara(String line) {
08fe2e33
NR
1006 line = ifUnhtml(line).trim();
1007
1008 boolean space = true;
1009 boolean brk = true;
1010 boolean quote = false;
1011 boolean tentativeCloseQuote = false;
1012 char prev = '\0';
1013 int dashCount = 0;
793f1071 1014 long words = 1;
08fe2e33
NR
1015
1016 StringBuilder builder = new StringBuilder();
1017 for (char car : line.toCharArray()) {
1018 if (car != '-') {
1019 if (dashCount > 0) {
1020 // dash, ndash and mdash: - – —
1021 // currently: always use mdash
1022 builder.append(dashCount == 1 ? '-' : '—');
1023 }
1024 dashCount = 0;
1025 }
1026
1027 if (tentativeCloseQuote) {
1028 tentativeCloseQuote = false;
22848428 1029 if (Character.isLetterOrDigit(car)) {
08fe2e33
NR
1030 builder.append("'");
1031 } else {
22848428
NR
1032 // handle double-single quotes as double quotes
1033 if (prev == car) {
1034 builder.append(closeDoubleQuote);
1035 continue;
1036 } else {
1037 builder.append(closeQuote);
1038 }
08fe2e33
NR
1039 }
1040 }
1041
1042 switch (car) {
1043 case ' ': // note: unbreakable space
1044 case ' ':
1045 case '\t':
1046 case '\n': // just in case
1047 case '\r': // just in case
793f1071
NR
1048 if (builder.length() > 0
1049 && builder.charAt(builder.length() - 1) != ' ') {
1050 words++;
1051 }
08fe2e33
NR
1052 builder.append(' ');
1053 break;
1054
1055 case '\'':
1056 if (space || (brk && quote)) {
1057 quote = true;
22848428
NR
1058 // handle double-single quotes as double quotes
1059 if (prev == car) {
1060 builder.deleteCharAt(builder.length() - 1);
1061 builder.append(openDoubleQuote);
1062 } else {
1063 builder.append(openQuote);
1064 }
1065 } else if (prev == ' ' || prev == car) {
1066 // handle double-single quotes as double quotes
1067 if (prev == car) {
1068 builder.deleteCharAt(builder.length() - 1);
1069 builder.append(openDoubleQuote);
1070 } else {
1071 builder.append(openQuote);
1072 }
08fe2e33
NR
1073 } else {
1074 // it is a quote ("I'm off") or a 'quote' ("This
1075 // 'good' restaurant"...)
1076 tentativeCloseQuote = true;
1077 }
1078 break;
1079
1080 case '"':
1081 if (space || (brk && quote)) {
1082 quote = true;
1083 builder.append(openDoubleQuote);
1084 } else if (prev == ' ') {
1085 builder.append(openDoubleQuote);
1086 } else {
1087 builder.append(closeDoubleQuote);
1088 }
1089 break;
1090
1091 case '-':
1092 if (space) {
1093 quote = true;
1094 } else {
1095 dashCount++;
1096 }
1097 space = false;
1098 break;
1099
1100 case '*':
1101 case '~':
1102 case '/':
1103 case '\\':
1104 case '<':
1105 case '>':
1106 case '=':
1107 case '+':
1108 case '_':
1109 case '–':
1110 case '—':
1111 space = false;
1112 builder.append(car);
1113 break;
1114
1115 case '‘':
1116 case '`':
1117 case '‹':
1118 case '﹁':
1119 case '〈':
1120 case '「':
1121 if (space || (brk && quote)) {
1122 quote = true;
1123 builder.append(openQuote);
1124 } else {
22848428
NR
1125 // handle double-single quotes as double quotes
1126 if (prev == car) {
1127 builder.deleteCharAt(builder.length() - 1);
1128 builder.append(openDoubleQuote);
1129 } else {
1130 builder.append(openQuote);
1131 }
08fe2e33
NR
1132 }
1133 space = false;
1134 brk = false;
1135 break;
1136
1137 case '’':
1138 case '›':
1139 case '﹂':
1140 case '〉':
1141 case '」':
1142 space = false;
1143 brk = false;
22848428
NR
1144 // handle double-single quotes as double quotes
1145 if (prev == car) {
1146 builder.deleteCharAt(builder.length() - 1);
1147 builder.append(closeDoubleQuote);
1148 } else {
1149 builder.append(closeQuote);
1150 }
08fe2e33
NR
1151 break;
1152
1153 case '«':
1154 case '“':
1155 case '﹃':
1156 case '《':
1157 case '『':
1158 if (space || (brk && quote)) {
1159 quote = true;
1160 builder.append(openDoubleQuote);
1161 } else {
1162 builder.append(openDoubleQuote);
1163 }
1164 space = false;
1165 brk = false;
1166 break;
1167
1168 case '»':
1169 case '”':
1170 case '﹄':
1171 case '》':
1172 case '』':
1173 space = false;
1174 brk = false;
1175 builder.append(closeDoubleQuote);
1176 break;
1177
1178 default:
1179 space = false;
1180 brk = false;
1181 builder.append(car);
1182 break;
1183 }
1184
1185 prev = car;
1186 }
1187
1188 if (tentativeCloseQuote) {
1189 tentativeCloseQuote = false;
1190 builder.append(closeQuote);
1191 }
1192
1193 line = builder.toString().trim();
1194
1195 ParagraphType type = ParagraphType.NORMAL;
1196 if (space) {
1197 type = ParagraphType.BLANK;
1198 } else if (brk) {
1199 type = ParagraphType.BREAK;
1200 } else if (quote) {
1201 type = ParagraphType.QUOTE;
1202 }
1203
793f1071 1204 return new Paragraph(type, line, words);
08fe2e33
NR
1205 }
1206
1207 /**
a4143cd7 1208 * Remove the HTML from the input <b>if</b> {@link BasicSupport#isHtml()} is
08fe2e33
NR
1209 * true.
1210 *
1211 * @param input
1212 * the input
1213 *
1214 * @return the no html version if needed
1215 */
1216 private String ifUnhtml(String input) {
1217 if (isHtml() && input != null) {
1218 return StringUtils.unhtml(input);
1219 }
1220
1221 return input;
1222 }
1223
1224 /**
1225 * Return a {@link BasicSupport} implementation supporting the given
1226 * resource if possible.
1227 *
1228 * @param url
1229 * the story resource
1230 *
1231 * @return an implementation that supports it, or NULL
1232 */
1233 public static BasicSupport getSupport(URL url) {
1234 if (url == null) {
1235 return null;
1236 }
1237
1238 // TEXT and INFO_TEXT always support files (not URLs though)
1239 for (SupportType type : SupportType.values()) {
1240 if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1241 BasicSupport support = getSupport(type);
1242 if (support != null && support.supports(url)) {
1243 return support;
1244 }
1245 }
1246 }
1247
373da363
NR
1248 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
1249 SupportType.TEXT }) {
08fe2e33
NR
1250 BasicSupport support = getSupport(type);
1251 if (support != null && support.supports(url)) {
1252 return support;
1253 }
1254 }
1255
1256 return null;
1257 }
1258
1259 /**
1260 * Return a {@link BasicSupport} implementation supporting the given type.
1261 *
1262 * @param type
1263 * the type
1264 *
1265 * @return an implementation that supports it, or NULL
1266 */
1267 public static BasicSupport getSupport(SupportType type) {
1268 switch (type) {
1269 case EPUB:
1270 return new Epub().setType(type);
1271 case INFO_TEXT:
1272 return new InfoText().setType(type);
1273 case FIMFICTION:
1274 return new Fimfiction().setType(type);
1275 case FANFICTION:
1276 return new Fanfiction().setType(type);
1277 case TEXT:
1278 return new Text().setType(type);
1279 case MANGAFOX:
1280 return new MangaFox().setType(type);
1281 case E621:
1282 return new E621().setType(type);
a4143cd7
NR
1283 case YIFFSTAR:
1284 return new YiffStar().setType(type);
08fe2e33
NR
1285 case CBZ:
1286 return new Cbz().setType(type);
373da363
NR
1287 case HTML:
1288 return new Html().setType(type);
08fe2e33
NR
1289 }
1290
1291 return null;
1292 }
68686a37
NR
1293
1294 /**
1295 * Return the first line from the given input which correspond to the given
1296 * selectors.
1297 *
1298 * @param in
1299 * the input
1300 * @param needle
1301 * a string that must be found inside the target line (also
1302 * supports "^" at start to say "only if it starts with" the
1303 * needle)
1304 * @param relativeLine
1305 * the line to return based upon the target line position (-1 =
1306 * the line before, 0 = the target line...)
1307 *
1308 * @return the line
1309 */
1310 static String getLine(InputStream in, String needle, int relativeLine) {
1311 return getLine(in, needle, relativeLine, true);
1312 }
1313
1314 /**
1315 * Return a line from the given input which correspond to the given
1316 * selectors.
1317 *
1318 * @param in
1319 * the input
1320 * @param needle
1321 * a string that must be found inside the target line (also
1322 * supports "^" at start to say "only if it starts with" the
1323 * needle)
1324 * @param relativeLine
1325 * the line to return based upon the target line position (-1 =
1326 * the line before, 0 = the target line...)
1327 * @param first
1328 * takes the first result (as opposed to the last one, which will
1329 * also always spend the input)
1330 *
1331 * @return the line
1332 */
1333 static String getLine(InputStream in, String needle, int relativeLine,
1334 boolean first) {
1335 String rep = null;
1336
1337 try {
1338 in.reset();
1339 } catch (IOException e) {
1340 Instance.syserr(e);
1341 }
1342
1343 List<String> lines = new ArrayList<String>();
1344 @SuppressWarnings("resource")
1345 Scanner scan = new Scanner(in, "UTF-8");
1346 int index = -1;
1347 scan.useDelimiter("\\n");
1348 while (scan.hasNext()) {
1349 lines.add(scan.next());
1350
1351 if (index == -1) {
1352 if (needle.startsWith("^")) {
1353 if (lines.get(lines.size() - 1).startsWith(
1354 needle.substring(1))) {
1355 index = lines.size() - 1;
1356 }
1357
1358 } else {
1359 if (lines.get(lines.size() - 1).contains(needle)) {
1360 index = lines.size() - 1;
1361 }
1362 }
1363 }
1364
1365 if (index >= 0 && index + relativeLine < lines.size()) {
1366 rep = lines.get(index + relativeLine);
1367 if (first) {
1368 break;
1369 }
1370 }
1371 }
1372
1373 return rep;
1374 }
08fe2e33 1375}