d56e52d3aea48afa64b28792fba5a8e2fa42090e
[fanfix.git] / src / be / nikiroo / fanfix / supported / BasicSupport.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.awt.image.BufferedImage;
4 import java.io.BufferedReader;
5 import java.io.ByteArrayInputStream;
6 import java.io.File;
7 import java.io.IOException;
8 import java.io.InputStream;
9 import java.io.InputStreamReader;
10 import java.net.MalformedURLException;
11 import java.net.URL;
12 import java.util.ArrayList;
13 import java.util.Date;
14 import java.util.HashMap;
15 import java.util.List;
16 import java.util.Map;
17 import java.util.Map.Entry;
18 import java.util.Scanner;
19
20 import be.nikiroo.fanfix.Instance;
21 import be.nikiroo.fanfix.bundles.Config;
22 import be.nikiroo.fanfix.bundles.StringId;
23 import be.nikiroo.fanfix.data.Chapter;
24 import be.nikiroo.fanfix.data.MetaData;
25 import be.nikiroo.fanfix.data.Paragraph;
26 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
27 import be.nikiroo.fanfix.data.Story;
28 import be.nikiroo.utils.ImageUtils;
29 import be.nikiroo.utils.Progress;
30 import be.nikiroo.utils.StringUtils;
31
32 /**
33 * This class is the base class used by the other support classes. It can be
34 * used outside of this package, and have static method that you can use to get
35 * access to the correct support class.
36 * <p>
37 * It will be used with 'resources' (usually web pages or files).
38 *
39 * @author niki
40 */
41 public abstract class BasicSupport {
42 /**
43 * The supported input types for which we can get a {@link BasicSupport}
44 * object.
45 *
46 * @author niki
47 */
48 public enum SupportType {
49 /** EPUB files created with this program */
50 EPUB,
51 /** Pure text file with some rules */
52 TEXT,
53 /** TEXT but with associated .info file */
54 INFO_TEXT,
55 /** My Little Pony fanfictions */
56 FIMFICTION,
57 /** Fanfictions from a lot of different universes */
58 FANFICTION,
59 /** Website with lots of Mangas */
60 MANGAFOX,
61 /** Furry website with comics support */
62 E621,
63 /** Furry website with stories */
64 YIFFSTAR,
65 /** Comics and images groups, mostly but not only NSFW */
66 E_HENTAI,
67 /** CBZ files */
68 CBZ,
69 /** HTML files */
70 HTML;
71
72 /**
73 * A description of this support type (more information than the
74 * {@link BasicSupport#getSourceName()}).
75 *
76 * @return the description
77 */
78 public String getDesc() {
79 String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
80 this.name());
81
82 if (desc == null) {
83 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
84 }
85
86 return desc;
87 }
88
89 /**
90 * The name of this support type (a short version).
91 *
92 * @return the name
93 */
94 public String getSourceName() {
95 BasicSupport support = BasicSupport.getSupport(this);
96 if (support != null) {
97 return support.getSourceName();
98 }
99
100 return null;
101 }
102
103 @Override
104 public String toString() {
105 return super.toString().toLowerCase();
106 }
107
108 /**
109 * Call {@link SupportType#valueOf(String)} after conversion to upper
110 * case.
111 *
112 * @param typeName
113 * the possible type name
114 *
115 * @return NULL or the type
116 */
117 public static SupportType valueOfUC(String typeName) {
118 return SupportType.valueOf(typeName == null ? null : typeName
119 .toUpperCase());
120 }
121
122 /**
123 * Call {@link SupportType#valueOf(String)} after conversion to upper
124 * case but return NULL for NULL instead of raising exception.
125 *
126 * @param typeName
127 * the possible type name
128 *
129 * @return NULL or the type
130 */
131 public static SupportType valueOfNullOkUC(String typeName) {
132 if (typeName == null) {
133 return null;
134 }
135
136 return SupportType.valueOfUC(typeName);
137 }
138
139 /**
140 * Call {@link SupportType#valueOf(String)} after conversion to upper
141 * case but return NULL in case of error instead of raising an
142 * exception.
143 *
144 * @param typeName
145 * the possible type name
146 *
147 * @return NULL or the type
148 */
149 public static SupportType valueOfAllOkUC(String typeName) {
150 try {
151 return SupportType.valueOfUC(typeName);
152 } catch (Exception e) {
153 return null;
154 }
155 }
156 }
157
158 private InputStream in;
159 private SupportType type;
160 private URL currentReferer; // with only one 'r', as in 'HTTP'...
161
162 // quote chars
163 private char openQuote = Instance.getTrans().getCharacter(
164 StringId.OPEN_SINGLE_QUOTE);
165 private char closeQuote = Instance.getTrans().getCharacter(
166 StringId.CLOSE_SINGLE_QUOTE);
167 private char openDoubleQuote = Instance.getTrans().getCharacter(
168 StringId.OPEN_DOUBLE_QUOTE);
169 private char closeDoubleQuote = Instance.getTrans().getCharacter(
170 StringId.CLOSE_DOUBLE_QUOTE);
171
172 /**
173 * The name of this support class.
174 *
175 * @return the name
176 */
177 protected abstract String getSourceName();
178
179 /**
180 * Check if the given resource is supported by this {@link BasicSupport}.
181 *
182 * @param url
183 * the resource to check for
184 *
185 * @return TRUE if it is
186 */
187 protected abstract boolean supports(URL url);
188
189 /**
190 * Return TRUE if the support will return HTML encoded content values for
191 * the chapters content.
192 *
193 * @return TRUE for HTML
194 */
195 protected abstract boolean isHtml();
196
197 /**
198 * Return the {@link MetaData} of this story.
199 *
200 * @param source
201 * the source of the story
202 * @param in
203 * the input (the main resource)
204 *
205 * @return the associated {@link MetaData}
206 *
207 * @throws IOException
208 * in case of I/O error
209 */
210 protected abstract MetaData getMeta(URL source, InputStream in)
211 throws IOException;
212
213 /**
214 * Return the story description.
215 *
216 * @param source
217 * the source of the story
218 * @param in
219 * the input (the main resource)
220 *
221 * @return the description
222 *
223 * @throws IOException
224 * in case of I/O error
225 */
226 protected abstract String getDesc(URL source, InputStream in)
227 throws IOException;
228
229 /**
230 * Return the list of chapters (name and resource).
231 *
232 * @param source
233 * the source of the story
234 * @param in
235 * the input (the main resource)
236 * @param pg
237 * the optional progress reporter
238 *
239 * @return the chapters
240 *
241 * @throws IOException
242 * in case of I/O error
243 */
244 protected abstract List<Entry<String, URL>> getChapters(URL source,
245 InputStream in, Progress pg) throws IOException;
246
247 /**
248 * Return the content of the chapter (possibly HTML encoded, if
249 * {@link BasicSupport#isHtml()} is TRUE).
250 *
251 * @param source
252 * the source of the story
253 * @param in
254 * the input (the main resource)
255 * @param number
256 * the chapter number
257 * @param pg
258 * the optional progress reporter
259 *
260 * @return the content
261 *
262 * @throws IOException
263 * in case of I/O error
264 */
265 protected abstract String getChapterContent(URL source, InputStream in,
266 int number, Progress pg) throws IOException;
267
268 /**
269 * Log into the support (can be a no-op depending upon the support).
270 *
271 * @throws IOException
272 * in case of I/O error
273 */
274 public void login() throws IOException {
275
276 }
277
278 /**
279 * Return the list of cookies (values included) that must be used to
280 * correctly fetch the resources.
281 * <p>
282 * You are expected to call the super method implementation if you override
283 * it.
284 *
285 * @return the cookies
286 *
287 * @throws IOException
288 * in case of I/O error
289 */
290 public Map<String, String> getCookies() throws IOException {
291 return new HashMap<String, String>();
292 }
293
294 /**
295 * Return the canonical form of the main {@link URL}.
296 *
297 * @param source
298 * the source {@link URL}
299 *
300 * @return the canonical form of this {@link URL}
301 *
302 * @throws IOException
303 * in case of I/O error
304 */
305 public URL getCanonicalUrl(URL source) throws IOException {
306 return source;
307 }
308
309 /**
310 * Process the given story resource into a partially filled {@link Story}
311 * object containing the name and metadata, except for the description.
312 *
313 * @param url
314 * the story resource
315 *
316 * @return the {@link Story}
317 *
318 * @throws IOException
319 * in case of I/O error
320 */
321 public Story processMeta(URL url) throws IOException {
322 return processMeta(url, true, false, null);
323 }
324
325 /**
326 * Process the given story resource into a partially filled {@link Story}
327 * object containing the name and metadata.
328 *
329 * @param url
330 * the story resource
331 * @param close
332 * close "this" and "in" when done
333 * @param getDesc
334 * retrieve the description of the story, or not
335 * @param pg
336 * the optional progress reporter
337 *
338 * @return the {@link Story}
339 *
340 * @throws IOException
341 * in case of I/O error
342 */
343 protected Story processMeta(URL url, boolean close, boolean getDesc,
344 Progress pg) throws IOException {
345 if (pg == null) {
346 pg = new Progress();
347 } else {
348 pg.setMinMax(0, 100);
349 }
350
351 login();
352 pg.setProgress(10);
353
354 url = getCanonicalUrl(url);
355
356 setCurrentReferer(url);
357
358 in = openInput(url);
359 if (in == null) {
360 return null;
361 }
362
363 try {
364 preprocess(url, getInput());
365 pg.setProgress(30);
366
367 Story story = new Story();
368 MetaData meta = getMeta(url, getInput());
369 if (meta.getCreationDate() == null
370 || meta.getCreationDate().isEmpty()) {
371 meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
372 }
373 story.setMeta(meta);
374
375 pg.setProgress(50);
376
377 if (meta.getCover() == null) {
378 meta.setCover(getDefaultCover(meta.getSubject()));
379 }
380
381 pg.setProgress(60);
382
383 if (getDesc) {
384 String descChapterName = Instance.getTrans().getString(
385 StringId.DESCRIPTION);
386 story.getMeta().setResume(
387 makeChapter(url, 0, descChapterName,
388 getDesc(url, getInput()), null));
389 }
390
391 pg.setProgress(100);
392 return story;
393 } finally {
394 if (close) {
395 try {
396 close();
397 } catch (IOException e) {
398 Instance.syserr(e);
399 }
400
401 if (in != null) {
402 in.close();
403 }
404 }
405
406 setCurrentReferer(null);
407 }
408 }
409
410 /**
411 * Process the given story resource into a fully filled {@link Story}
412 * object.
413 *
414 * @param url
415 * the story resource
416 * @param pg
417 * the optional progress reporter
418 *
419 * @return the {@link Story}
420 *
421 * @throws IOException
422 * in case of I/O error
423 */
424 public Story process(URL url, Progress pg) throws IOException {
425 if (pg == null) {
426 pg = new Progress();
427 } else {
428 pg.setMinMax(0, 100);
429 }
430
431 url = getCanonicalUrl(url);
432 pg.setProgress(1);
433 try {
434 Progress pgMeta = new Progress();
435 pg.addProgress(pgMeta, 10);
436 Story story = processMeta(url, false, true, pgMeta);
437 if (!pgMeta.isDone()) {
438 pgMeta.setProgress(pgMeta.getMax()); // 10%
439 }
440
441 if (story == null) {
442 pg.setProgress(90);
443 return null;
444 }
445
446 pg.setName("Retrieving " + story.getMeta().getTitle());
447
448 setCurrentReferer(url);
449
450 Progress pgGetChapters = new Progress();
451 pg.addProgress(pgGetChapters, 10);
452 story.setChapters(new ArrayList<Chapter>());
453 List<Entry<String, URL>> chapters = getChapters(url, getInput(),
454 pgGetChapters);
455 if (!pgGetChapters.isDone()) {
456 pgGetChapters.setProgress(pgGetChapters.getMax()); // 20%
457 }
458
459 if (chapters != null) {
460 Progress pgChaps = new Progress("Extracting chapters", 0,
461 chapters.size() * 300);
462 pg.addProgress(pgChaps, 80);
463
464 long words = 0;
465 int i = 1;
466 for (Entry<String, URL> chap : chapters) {
467 pgChaps.setName("Extracting chapter " + i);
468 setCurrentReferer(chap.getValue());
469 InputStream chapIn = Instance.getCache().open(
470 chap.getValue(), this, true);
471 pgChaps.setProgress(i * 100);
472 try {
473 Progress pgGetChapterContent = new Progress();
474 Progress pgMakeChapter = new Progress();
475 pgChaps.addProgress(pgGetChapterContent, 100);
476 pgChaps.addProgress(pgMakeChapter, 100);
477
478 String content = getChapterContent(url, chapIn, i,
479 pgGetChapterContent);
480 if (!pgGetChapterContent.isDone()) {
481 pgGetChapterContent.setProgress(pgGetChapterContent
482 .getMax());
483 }
484
485 Chapter cc = makeChapter(url, i, chap.getKey(),
486 content, pgMakeChapter);
487 if (!pgMakeChapter.isDone()) {
488 pgMakeChapter.setProgress(pgMakeChapter.getMax());
489 }
490
491 words += cc.getWords();
492 story.getChapters().add(cc);
493 if (story.getMeta() != null) {
494 story.getMeta().setWords(words);
495 }
496 } finally {
497 chapIn.close();
498 }
499
500 i++;
501 }
502
503 pgChaps.setName("Extracting chapters");
504 } else {
505 pg.setProgress(80);
506 }
507
508 return story;
509
510 } finally {
511 try {
512 close();
513 } catch (IOException e) {
514 Instance.syserr(e);
515 }
516
517 if (in != null) {
518 in.close();
519 }
520
521 setCurrentReferer(null);
522 }
523 }
524
525 /**
526 * The support type.
527 *
528 * @return the type
529 */
530 public SupportType getType() {
531 return type;
532 }
533
534 /**
535 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
536 * the current {@link URL} we work on.
537 *
538 * @return the referer
539 */
540 public URL getCurrentReferer() {
541 return currentReferer;
542 }
543
544 /**
545 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
546 * the current {@link URL} we work on.
547 *
548 * @param currentReferer
549 * the new referer
550 */
551 protected void setCurrentReferer(URL currentReferer) {
552 this.currentReferer = currentReferer;
553 }
554
555 /**
556 * The support type.
557 *
558 * @param type
559 * the new type
560 *
561 * @return this
562 */
563 protected BasicSupport setType(SupportType type) {
564 this.type = type;
565 return this;
566 }
567
568 /**
569 * Prepare the support if needed before processing.
570 *
571 * @param source
572 * the source of the story
573 * @param in
574 * the input (the main resource)
575 *
576 * @throws IOException
577 * on I/O error
578 */
579 @SuppressWarnings("unused")
580 protected void preprocess(URL source, InputStream in) throws IOException {
581 }
582
583 /**
584 * Now that we have processed the {@link Story}, close the resources if any.
585 *
586 * @throws IOException
587 * on I/O error
588 */
589 protected void close() throws IOException {
590 }
591
592 /**
593 * Create a {@link Chapter} object from the given information, formatting
594 * the content as it should be.
595 *
596 * @param source
597 * the source of the story
598 * @param number
599 * the chapter number
600 * @param name
601 * the chapter name
602 * @param content
603 * the chapter content
604 * @param pg
605 * the optional progress reporter
606 *
607 * @return the {@link Chapter}
608 *
609 * @throws IOException
610 * in case of I/O error
611 */
612 protected Chapter makeChapter(URL source, int number, String name,
613 String content, Progress pg) throws IOException {
614 // Chapter name: process it correctly, then remove the possible
615 // redundant "Chapter x: " in front of it, or "-" (as in
616 // "Chapter 5: - Fun!" after the ": " was automatically added)
617 String chapterName = processPara(name).getContent().trim();
618 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
619 .split(",")) {
620 String chapterWord = Instance.getConfig().getStringX(
621 Config.CHAPTER, lang);
622 if (chapterName.startsWith(chapterWord)) {
623 chapterName = chapterName.substring(chapterWord.length())
624 .trim();
625 break;
626 }
627 }
628
629 if (chapterName.startsWith(Integer.toString(number))) {
630 chapterName = chapterName.substring(
631 Integer.toString(number).length()).trim();
632 }
633
634 while (chapterName.startsWith(":") || chapterName.startsWith("-")) {
635 chapterName = chapterName.substring(1).trim();
636 }
637 //
638
639 Chapter chap = new Chapter(number, chapterName);
640
641 if (content != null) {
642 List<Paragraph> paras = makeParagraphs(source, content, pg);
643 long words = 0;
644 for (Paragraph para : paras) {
645 words += para.getWords();
646 }
647 chap.setParagraphs(paras);
648 chap.setWords(words);
649 }
650
651 return chap;
652
653 }
654
655 /**
656 * Convert the given content into {@link Paragraph}s.
657 *
658 * @param source
659 * the source URL of the story
660 * @param content
661 * the textual content
662 * @param pg
663 * the optional progress reporter
664 *
665 * @return the {@link Paragraph}s
666 *
667 * @throws IOException
668 * in case of I/O error
669 */
670 protected List<Paragraph> makeParagraphs(URL source, String content,
671 Progress pg) throws IOException {
672 if (pg == null) {
673 pg = new Progress();
674 }
675
676 if (isHtml()) {
677 // Special <HR> processing:
678 content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
679 "<br/>* * *<br/>");
680 }
681
682 List<Paragraph> paras = new ArrayList<Paragraph>();
683
684 if (content != null && !content.trim().isEmpty()) {
685 if (isHtml()) {
686 String[] tab = content.split("(<p>|</p>|<br>|<br/>)");
687 pg.setMinMax(0, tab.length);
688 int i = 1;
689 for (String line : tab) {
690 if (line.startsWith("[") && line.endsWith("]")) {
691 pg.setName("Extracting image " + i);
692 }
693 paras.add(makeParagraph(source, line.trim()));
694 pg.setProgress(i++);
695 }
696 pg.setName(null);
697 } else {
698 List<String> lines = new ArrayList<String>();
699 BufferedReader buff = null;
700 try {
701 buff = new BufferedReader(
702 new InputStreamReader(new ByteArrayInputStream(
703 content.getBytes("UTF-8")), "UTF-8"));
704 for (String line = buff.readLine(); line != null; line = buff
705 .readLine()) {
706 lines.add(line.trim());
707 }
708 } finally {
709 if (buff != null) {
710 buff.close();
711 }
712 }
713
714 pg.setMinMax(0, lines.size());
715 int i = 0;
716 for (String line : lines) {
717 if (line.startsWith("[") && line.endsWith("]")) {
718 pg.setName("Extracting image " + i);
719 }
720 paras.add(makeParagraph(source, line));
721 pg.setProgress(i++);
722 }
723 pg.setName(null);
724 }
725
726 // Check quotes for "bad" format
727 List<Paragraph> newParas = new ArrayList<Paragraph>();
728 for (Paragraph para : paras) {
729 newParas.addAll(requotify(para));
730 }
731 paras = newParas;
732
733 // Remove double blanks/brks
734 fixBlanksBreaks(paras);
735 }
736
737 return paras;
738 }
739
740 /**
741 * Convert the given line into a single {@link Paragraph}.
742 *
743 * @param source
744 * the source URL of the story
745 * @param line
746 * the textual content of the paragraph
747 *
748 * @return the {@link Paragraph}
749 */
750 private Paragraph makeParagraph(URL source, String line) {
751 URL image = null;
752 if (line.startsWith("[") && line.endsWith("]")) {
753 image = getImageUrl(this, source,
754 line.substring(1, line.length() - 1).trim());
755 }
756
757 if (image != null) {
758 return new Paragraph(image);
759 }
760
761 return processPara(line);
762 }
763
764 /**
765 * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
766 * those {@link Paragraph}s.
767 * <p>
768 * The resulting list will not contain a starting or trailing blank/break
769 * nor 2 blanks or breaks following each other.
770 *
771 * @param paras
772 * the list of {@link Paragraph}s to fix
773 */
774 protected void fixBlanksBreaks(List<Paragraph> paras) {
775 boolean space = false;
776 boolean brk = true;
777 for (int i = 0; i < paras.size(); i++) {
778 Paragraph para = paras.get(i);
779 boolean thisSpace = para.getType() == ParagraphType.BLANK;
780 boolean thisBrk = para.getType() == ParagraphType.BREAK;
781
782 if (i > 0 && space && thisBrk) {
783 paras.remove(i - 1);
784 i--;
785 } else if ((space || brk) && (thisSpace || thisBrk)) {
786 paras.remove(i);
787 i--;
788 }
789
790 space = thisSpace;
791 brk = thisBrk;
792 }
793
794 // Remove blank/brk at start
795 if (paras.size() > 0
796 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
797 0).getType() == ParagraphType.BREAK)) {
798 paras.remove(0);
799 }
800
801 // Remove blank/brk at end
802 int last = paras.size() - 1;
803 if (paras.size() > 0
804 && (paras.get(last).getType() == ParagraphType.BLANK || paras
805 .get(last).getType() == ParagraphType.BREAK)) {
806 paras.remove(last);
807 }
808 }
809
810 /**
811 * Get the default cover related to this subject (see <tt>.info</tt> files).
812 *
813 * @param subject
814 * the subject
815 *
816 * @return the cover if any, or NULL
817 */
818 static BufferedImage getDefaultCover(String subject) {
819 if (subject != null && !subject.isEmpty()
820 && Instance.getCoverDir() != null) {
821 try {
822 File fileCover = new File(Instance.getCoverDir(), subject);
823 return getImage(null, fileCover.toURI().toURL(), subject);
824 } catch (MalformedURLException e) {
825 }
826 }
827
828 return null;
829 }
830
831 /**
832 * Return the list of supported image extensions.
833 *
834 * @param emptyAllowed
835 * TRUE to allow an empty extension on first place, which can be
836 * used when you may already have an extension in your input but
837 * are not sure about it
838 *
839 * @return the extensions
840 */
841 static String[] getImageExt(boolean emptyAllowed) {
842 if (emptyAllowed) {
843 return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
844 }
845
846 return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
847 }
848
849 /**
850 * Check if the given resource can be a local image or a remote image, then
851 * refresh the cache with it if it is.
852 *
853 * @param source
854 * the story source
855 * @param line
856 * the resource to check
857 *
858 * @return the image if found, or NULL
859 *
860 */
861 static BufferedImage getImage(BasicSupport support, URL source, String line) {
862 URL url = getImageUrl(support, source, line);
863 if (url != null) {
864 InputStream in = null;
865 try {
866 in = Instance.getCache().open(url, getSupport(url), true);
867 return ImageUtils.fromStream(in);
868 } catch (IOException e) {
869 } finally {
870 if (in != null) {
871 try {
872 in.close();
873 } catch (IOException e) {
874 }
875 }
876 }
877 }
878
879 return null;
880 }
881
882 /**
883 * Check if the given resource can be a local image or a remote image, then
884 * refresh the cache with it if it is.
885 *
886 * @param source
887 * the story source
888 * @param line
889 * the resource to check
890 *
891 * @return the image URL if found, or NULL
892 *
893 */
894 static URL getImageUrl(BasicSupport support, URL source, String line) {
895 URL url = null;
896
897 if (line != null) {
898 // try for files
899 if (source != null) {
900 try {
901
902 String relPath = null;
903 String absPath = null;
904 try {
905 String path = new File(source.getFile()).getParent();
906 relPath = new File(new File(path), line.trim())
907 .getAbsolutePath();
908 } catch (Exception e) {
909 // Cannot be converted to path (one possibility to take
910 // into account: absolute path on Windows)
911 }
912 try {
913 absPath = new File(line.trim()).getAbsolutePath();
914 } catch (Exception e) {
915 // Cannot be converted to path (at all)
916 }
917
918 for (String ext : getImageExt(true)) {
919 if (absPath != null && new File(absPath + ext).exists()) {
920 url = new File(absPath + ext).toURI().toURL();
921 } else if (relPath != null
922 && new File(relPath + ext).exists()) {
923 url = new File(relPath + ext).toURI().toURL();
924 }
925 }
926 } catch (Exception e) {
927 // Should not happen since we control the correct arguments
928 }
929 }
930
931 if (url == null) {
932 // try for URLs
933 try {
934 for (String ext : getImageExt(true)) {
935 if (Instance.getCache().check(new URL(line + ext))) {
936 url = new URL(line + ext);
937 break;
938 }
939 }
940
941 // try out of cache
942 if (url == null) {
943 for (String ext : getImageExt(true)) {
944 try {
945 url = new URL(line + ext);
946 Instance.getCache().refresh(url, support, true);
947 break;
948 } catch (IOException e) {
949 // no image with this ext
950 url = null;
951 }
952 }
953 }
954 } catch (MalformedURLException e) {
955 // Not an url
956 }
957 }
958
959 // refresh the cached file
960 if (url != null) {
961 try {
962 Instance.getCache().refresh(url, support, true);
963 } catch (IOException e) {
964 // woops, broken image
965 url = null;
966 }
967 }
968 }
969
970 return url;
971 }
972
973 /**
974 * Open the input file that will be used through the support.
975 *
976 * @param source
977 * the source {@link URL}
978 *
979 * @return the {@link InputStream}
980 *
981 * @throws IOException
982 * in case of I/O error
983 */
984 protected InputStream openInput(URL source) throws IOException {
985 return Instance.getCache().open(source, this, false);
986 }
987
988 /**
989 * Reset the given {@link InputStream} and return it.
990 *
991 * @param in
992 * the {@link InputStream} to reset
993 *
994 * @return the same {@link InputStream} after reset
995 */
996 protected InputStream reset(InputStream in) {
997 try {
998 in.reset();
999 } catch (IOException e) {
1000 }
1001 return in;
1002 }
1003
1004 /**
1005 * Reset then return {@link BasicSupport#in}.
1006 *
1007 * @return {@link BasicSupport#in}
1008 */
1009 protected InputStream getInput() {
1010 return reset(in);
1011 }
1012
1013 /**
1014 * Fix the author name if it is prefixed with some "by" {@link String}.
1015 *
1016 * @param author
1017 * the author with a possible prefix
1018 *
1019 * @return the author without prefixes
1020 */
1021 protected String fixAuthor(String author) {
1022 if (author != null) {
1023 for (String suffix : new String[] { " ", ":" }) {
1024 for (String byString : Instance.getConfig()
1025 .getString(Config.BYS).split(",")) {
1026 byString += suffix;
1027 if (author.toUpperCase().startsWith(byString.toUpperCase())) {
1028 author = author.substring(byString.length()).trim();
1029 }
1030 }
1031 }
1032
1033 // Special case (without suffix):
1034 if (author.startsWith("©")) {
1035 author = author.substring(1);
1036 }
1037 }
1038
1039 return author;
1040 }
1041
1042 /**
1043 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
1044 * and requotify them (i.e., separate them into QUOTE paragraphs and other
1045 * paragraphs (quotes or not)).
1046 *
1047 * @param para
1048 * the paragraph to requotify (not necessarily a quote)
1049 *
1050 * @return the correctly (or so we hope) quotified paragraphs
1051 */
1052 protected List<Paragraph> requotify(Paragraph para) {
1053 List<Paragraph> newParas = new ArrayList<Paragraph>();
1054
1055 if (para.getType() == ParagraphType.QUOTE
1056 && para.getContent().length() > 2) {
1057 String line = para.getContent();
1058 boolean singleQ = line.startsWith("" + openQuote);
1059 boolean doubleQ = line.startsWith("" + openDoubleQuote);
1060
1061 // Do not try when more than one quote at a time
1062 // (some stories are not easily readable if we do)
1063 if (singleQ
1064 && line.indexOf(closeQuote, 1) < line
1065 .lastIndexOf(closeQuote)) {
1066 newParas.add(para);
1067 return newParas;
1068 }
1069 if (doubleQ
1070 && line.indexOf(closeDoubleQuote, 1) < line
1071 .lastIndexOf(closeDoubleQuote)) {
1072 newParas.add(para);
1073 return newParas;
1074 }
1075 //
1076
1077 if (!singleQ && !doubleQ) {
1078 line = openDoubleQuote + line + closeDoubleQuote;
1079 newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
1080 .getWords()));
1081 } else {
1082 char open = singleQ ? openQuote : openDoubleQuote;
1083 char close = singleQ ? closeQuote : closeDoubleQuote;
1084
1085 int posDot = -1;
1086 boolean inQuote = false;
1087 int i = 0;
1088 for (char car : line.toCharArray()) {
1089 if (car == open) {
1090 inQuote = true;
1091 } else if (car == close) {
1092 inQuote = false;
1093 } else if (car == '.' && !inQuote) {
1094 posDot = i;
1095 break;
1096 }
1097 i++;
1098 }
1099
1100 if (posDot >= 0) {
1101 String rest = line.substring(posDot + 1).trim();
1102 line = line.substring(0, posDot + 1).trim();
1103 long words = 1;
1104 for (char car : line.toCharArray()) {
1105 if (car == ' ') {
1106 words++;
1107 }
1108 }
1109 newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
1110 if (!rest.isEmpty()) {
1111 newParas.addAll(requotify(processPara(rest)));
1112 }
1113 } else {
1114 newParas.add(para);
1115 }
1116 }
1117 } else {
1118 newParas.add(para);
1119 }
1120
1121 return newParas;
1122 }
1123
1124 /**
1125 * Process a {@link Paragraph} from a raw line of text.
1126 * <p>
1127 * Will also fix quotes and HTML encoding if needed.
1128 *
1129 * @param line
1130 * the raw line
1131 *
1132 * @return the processed {@link Paragraph}
1133 */
1134 protected Paragraph processPara(String line) {
1135 line = ifUnhtml(line).trim();
1136
1137 boolean space = true;
1138 boolean brk = true;
1139 boolean quote = false;
1140 boolean tentativeCloseQuote = false;
1141 char prev = '\0';
1142 int dashCount = 0;
1143 long words = 1;
1144
1145 StringBuilder builder = new StringBuilder();
1146 for (char car : line.toCharArray()) {
1147 if (car != '-') {
1148 if (dashCount > 0) {
1149 // dash, ndash and mdash: - – —
1150 // currently: always use mdash
1151 builder.append(dashCount == 1 ? '-' : '—');
1152 }
1153 dashCount = 0;
1154 }
1155
1156 if (tentativeCloseQuote) {
1157 tentativeCloseQuote = false;
1158 if (Character.isLetterOrDigit(car)) {
1159 builder.append("'");
1160 } else {
1161 // handle double-single quotes as double quotes
1162 if (prev == car) {
1163 builder.append(closeDoubleQuote);
1164 continue;
1165 }
1166
1167 builder.append(closeQuote);
1168 }
1169 }
1170
1171 switch (car) {
1172 case ' ': // note: unbreakable space
1173 case ' ':
1174 case '\t':
1175 case '\n': // just in case
1176 case '\r': // just in case
1177 if (builder.length() > 0
1178 && builder.charAt(builder.length() - 1) != ' ') {
1179 words++;
1180 }
1181 builder.append(' ');
1182 break;
1183
1184 case '\'':
1185 if (space || (brk && quote)) {
1186 quote = true;
1187 // handle double-single quotes as double quotes
1188 if (prev == car) {
1189 builder.deleteCharAt(builder.length() - 1);
1190 builder.append(openDoubleQuote);
1191 } else {
1192 builder.append(openQuote);
1193 }
1194 } else if (prev == ' ' || prev == car) {
1195 // handle double-single quotes as double quotes
1196 if (prev == car) {
1197 builder.deleteCharAt(builder.length() - 1);
1198 builder.append(openDoubleQuote);
1199 } else {
1200 builder.append(openQuote);
1201 }
1202 } else {
1203 // it is a quote ("I'm off") or a 'quote' ("This
1204 // 'good' restaurant"...)
1205 tentativeCloseQuote = true;
1206 }
1207 break;
1208
1209 case '"':
1210 if (space || (brk && quote)) {
1211 quote = true;
1212 builder.append(openDoubleQuote);
1213 } else if (prev == ' ') {
1214 builder.append(openDoubleQuote);
1215 } else {
1216 builder.append(closeDoubleQuote);
1217 }
1218 break;
1219
1220 case '-':
1221 if (space) {
1222 quote = true;
1223 } else {
1224 dashCount++;
1225 }
1226 space = false;
1227 break;
1228
1229 case '*':
1230 case '~':
1231 case '/':
1232 case '\\':
1233 case '<':
1234 case '>':
1235 case '=':
1236 case '+':
1237 case '_':
1238 case '–':
1239 case '—':
1240 space = false;
1241 builder.append(car);
1242 break;
1243
1244 case '‘':
1245 case '`':
1246 case '‹':
1247 case '﹁':
1248 case '〈':
1249 case '「':
1250 if (space || (brk && quote)) {
1251 quote = true;
1252 builder.append(openQuote);
1253 } else {
1254 // handle double-single quotes as double quotes
1255 if (prev == car) {
1256 builder.deleteCharAt(builder.length() - 1);
1257 builder.append(openDoubleQuote);
1258 } else {
1259 builder.append(openQuote);
1260 }
1261 }
1262 space = false;
1263 brk = false;
1264 break;
1265
1266 case '’':
1267 case '›':
1268 case '﹂':
1269 case '〉':
1270 case '」':
1271 space = false;
1272 brk = false;
1273 // handle double-single quotes as double quotes
1274 if (prev == car) {
1275 builder.deleteCharAt(builder.length() - 1);
1276 builder.append(closeDoubleQuote);
1277 } else {
1278 builder.append(closeQuote);
1279 }
1280 break;
1281
1282 case '«':
1283 case '“':
1284 case '﹃':
1285 case '《':
1286 case '『':
1287 if (space || (brk && quote)) {
1288 quote = true;
1289 builder.append(openDoubleQuote);
1290 } else {
1291 builder.append(openDoubleQuote);
1292 }
1293 space = false;
1294 brk = false;
1295 break;
1296
1297 case '»':
1298 case '”':
1299 case '﹄':
1300 case '》':
1301 case '』':
1302 space = false;
1303 brk = false;
1304 builder.append(closeDoubleQuote);
1305 break;
1306
1307 default:
1308 space = false;
1309 brk = false;
1310 builder.append(car);
1311 break;
1312 }
1313
1314 prev = car;
1315 }
1316
1317 if (tentativeCloseQuote) {
1318 tentativeCloseQuote = false;
1319 builder.append(closeQuote);
1320 }
1321
1322 line = builder.toString().trim();
1323
1324 ParagraphType type = ParagraphType.NORMAL;
1325 if (space) {
1326 type = ParagraphType.BLANK;
1327 } else if (brk) {
1328 type = ParagraphType.BREAK;
1329 } else if (quote) {
1330 type = ParagraphType.QUOTE;
1331 }
1332
1333 return new Paragraph(type, line, words);
1334 }
1335
1336 /**
1337 * Remove the HTML from the input <b>if</b> {@link BasicSupport#isHtml()} is
1338 * true.
1339 *
1340 * @param input
1341 * the input
1342 *
1343 * @return the no html version if needed
1344 */
1345 private String ifUnhtml(String input) {
1346 if (isHtml() && input != null) {
1347 return StringUtils.unhtml(input);
1348 }
1349
1350 return input;
1351 }
1352
1353 /**
1354 * Return a {@link BasicSupport} implementation supporting the given
1355 * resource if possible.
1356 *
1357 * @param url
1358 * the story resource
1359 *
1360 * @return an implementation that supports it, or NULL
1361 */
1362 public static BasicSupport getSupport(URL url) {
1363 if (url == null) {
1364 return null;
1365 }
1366
1367 // TEXT and INFO_TEXT always support files (not URLs though)
1368 for (SupportType type : SupportType.values()) {
1369 if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1370 BasicSupport support = getSupport(type);
1371 if (support != null && support.supports(url)) {
1372 return support;
1373 }
1374 }
1375 }
1376
1377 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
1378 SupportType.TEXT }) {
1379 BasicSupport support = getSupport(type);
1380 if (support != null && support.supports(url)) {
1381 return support;
1382 }
1383 }
1384
1385 return null;
1386 }
1387
1388 /**
1389 * Return a {@link BasicSupport} implementation supporting the given type.
1390 *
1391 * @param type
1392 * the type
1393 *
1394 * @return an implementation that supports it, or NULL
1395 */
1396 public static BasicSupport getSupport(SupportType type) {
1397 switch (type) {
1398 case EPUB:
1399 return new Epub().setType(type);
1400 case INFO_TEXT:
1401 return new InfoText().setType(type);
1402 case FIMFICTION:
1403 return new Fimfiction().setType(type);
1404 case FANFICTION:
1405 return new Fanfiction().setType(type);
1406 case TEXT:
1407 return new Text().setType(type);
1408 case MANGAFOX:
1409 return new MangaFox().setType(type);
1410 case E621:
1411 return new E621().setType(type);
1412 case YIFFSTAR:
1413 return new YiffStar().setType(type);
1414 case E_HENTAI:
1415 return new EHentai().setType(type);
1416 case CBZ:
1417 return new Cbz().setType(type);
1418 case HTML:
1419 return new Html().setType(type);
1420 }
1421
1422 return null;
1423 }
1424
1425 /**
1426 * Return the first line from the given input which correspond to the given
1427 * selectors.
1428 *
1429 * @param in
1430 * the input
1431 * @param needle
1432 * a string that must be found inside the target line (also
1433 * supports "^" at start to say "only if it starts with" the
1434 * needle)
1435 * @param relativeLine
1436 * the line to return based upon the target line position (-1 =
1437 * the line before, 0 = the target line...)
1438 *
1439 * @return the line
1440 */
1441 static String getLine(InputStream in, String needle, int relativeLine) {
1442 return getLine(in, needle, relativeLine, true);
1443 }
1444
1445 /**
1446 * Return a line from the given input which correspond to the given
1447 * selectors.
1448 *
1449 * @param in
1450 * the input
1451 * @param needle
1452 * a string that must be found inside the target line (also
1453 * supports "^" at start to say "only if it starts with" the
1454 * needle)
1455 * @param relativeLine
1456 * the line to return based upon the target line position (-1 =
1457 * the line before, 0 = the target line...)
1458 * @param first
1459 * takes the first result (as opposed to the last one, which will
1460 * also always spend the input)
1461 *
1462 * @return the line
1463 */
1464 static String getLine(InputStream in, String needle, int relativeLine,
1465 boolean first) {
1466 String rep = null;
1467
1468 try {
1469 in.reset();
1470 } catch (IOException e) {
1471 Instance.syserr(e);
1472 }
1473
1474 List<String> lines = new ArrayList<String>();
1475 @SuppressWarnings("resource")
1476 Scanner scan = new Scanner(in, "UTF-8");
1477 int index = -1;
1478 scan.useDelimiter("\\n");
1479 while (scan.hasNext()) {
1480 lines.add(scan.next());
1481
1482 if (index == -1) {
1483 if (needle.startsWith("^")) {
1484 if (lines.get(lines.size() - 1).startsWith(
1485 needle.substring(1))) {
1486 index = lines.size() - 1;
1487 }
1488
1489 } else {
1490 if (lines.get(lines.size() - 1).contains(needle)) {
1491 index = lines.size() - 1;
1492 }
1493 }
1494 }
1495
1496 if (index >= 0 && index + relativeLine < lines.size()) {
1497 rep = lines.get(index + relativeLine);
1498 if (first) {
1499 break;
1500 }
1501 }
1502 }
1503
1504 return rep;
1505 }
1506
1507 /**
1508 * Return the text between the key and the endKey (and optional subKey can
1509 * be passed, in this case we will look for the key first, then take the
1510 * text between the subKey and the endKey).
1511 * <p>
1512 * Will only match the first line with the given key if more than one are
1513 * possible. Which also means that if the subKey or endKey is not found on
1514 * that line, NULL will be returned.
1515 *
1516 * @param in
1517 * the input
1518 * @param key
1519 * the key to match (also supports "^" at start to say
1520 * "only if it starts with" the key)
1521 * @param subKey
1522 * the sub key or NULL if none
1523 * @param endKey
1524 * the end key or NULL for "up to the end"
1525 * @return the text or NULL if not found
1526 */
1527 static String getKeyLine(InputStream in, String key, String subKey,
1528 String endKey) {
1529 String result = null;
1530
1531 String line = getLine(in, key, 0);
1532 if (line != null && line.contains(key)) {
1533 line = line.substring(line.indexOf(key) + key.length());
1534 if (subKey == null || subKey.isEmpty() || line.contains(subKey)) {
1535 if (subKey != null) {
1536 line = line.substring(line.indexOf(subKey)
1537 + subKey.length());
1538 }
1539 if (endKey == null || line.contains(endKey)) {
1540 if (endKey != null) {
1541 line = line.substring(0, line.indexOf(endKey));
1542 result = line;
1543 }
1544 }
1545 }
1546 }
1547
1548 return result;
1549 }
1550 }