Don't check against NULL on NotNull
[fanfix.git] / src / be / nikiroo / fanfix / supported / BasicSupport.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.awt.image.BufferedImage;
4 import java.io.BufferedReader;
5 import java.io.ByteArrayInputStream;
6 import java.io.File;
7 import java.io.IOException;
8 import java.io.InputStream;
9 import java.io.InputStreamReader;
10 import java.net.MalformedURLException;
11 import java.net.URL;
12 import java.util.ArrayList;
13 import java.util.Date;
14 import java.util.HashMap;
15 import java.util.List;
16 import java.util.Map;
17 import java.util.Map.Entry;
18 import java.util.Scanner;
19
20 import be.nikiroo.fanfix.Instance;
21 import be.nikiroo.fanfix.bundles.Config;
22 import be.nikiroo.fanfix.bundles.StringId;
23 import be.nikiroo.fanfix.data.Chapter;
24 import be.nikiroo.fanfix.data.MetaData;
25 import be.nikiroo.fanfix.data.Paragraph;
26 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
27 import be.nikiroo.fanfix.data.Story;
28 import be.nikiroo.utils.ImageUtils;
29 import be.nikiroo.utils.Progress;
30 import be.nikiroo.utils.StringUtils;
31
32 /**
33 * This class is the base class used by the other support classes. It can be
34 * used outside of this package, and have static method that you can use to get
35 * access to the correct support class.
36 * <p>
37 * It will be used with 'resources' (usually web pages or files).
38 *
39 * @author niki
40 */
41 public abstract class BasicSupport {
42 /**
43 * The supported input types for which we can get a {@link BasicSupport}
44 * object.
45 *
46 * @author niki
47 */
48 public enum SupportType {
49 /** EPUB files created with this program */
50 EPUB,
51 /** Pure text file with some rules */
52 TEXT,
53 /** TEXT but with associated .info file */
54 INFO_TEXT,
55 /** My Little Pony fanfictions */
56 FIMFICTION,
57 /** Fanfictions from a lot of different universes */
58 FANFICTION,
59 /** Website with lots of Mangas */
60 MANGAFOX,
61 /** Furry website with comics support */
62 E621,
63 /** Furry website with stories */
64 YIFFSTAR,
65 /** Comics and images groups, mostly but not only NSFW */
66 E_HENTAI,
67 /** CBZ files */
68 CBZ,
69 /** HTML files */
70 HTML;
71
72 /**
73 * A description of this support type (more information than the
74 * {@link BasicSupport#getSourceName()}).
75 *
76 * @return the description
77 */
78 public String getDesc() {
79 String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
80 this.name());
81
82 if (desc == null) {
83 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
84 }
85
86 return desc;
87 }
88
89 /**
90 * The name of this support type (a short version).
91 *
92 * @return the name
93 */
94 public String getSourceName() {
95 BasicSupport support = BasicSupport.getSupport(this);
96 if (support != null) {
97 return support.getSourceName();
98 }
99
100 return null;
101 }
102
103 @Override
104 public String toString() {
105 return super.toString().toLowerCase();
106 }
107
108 /**
109 * Call {@link SupportType#valueOf(String)} after conversion to upper
110 * case.
111 *
112 * @param typeName
113 * the possible type name
114 *
115 * @return NULL or the type
116 */
117 public static SupportType valueOfUC(String typeName) {
118 return SupportType.valueOf(typeName == null ? null : typeName
119 .toUpperCase());
120 }
121
122 /**
123 * Call {@link SupportType#valueOf(String)} after conversion to upper
124 * case but return NULL for NULL instead of raising exception.
125 *
126 * @param typeName
127 * the possible type name
128 *
129 * @return NULL or the type
130 */
131 public static SupportType valueOfNullOkUC(String typeName) {
132 if (typeName == null) {
133 return null;
134 }
135
136 return SupportType.valueOfUC(typeName);
137 }
138
139 /**
140 * Call {@link SupportType#valueOf(String)} after conversion to upper
141 * case but return NULL in case of error instead of raising an
142 * exception.
143 *
144 * @param typeName
145 * the possible type name
146 *
147 * @return NULL or the type
148 */
149 public static SupportType valueOfAllOkUC(String typeName) {
150 try {
151 return SupportType.valueOfUC(typeName);
152 } catch (Exception e) {
153 return null;
154 }
155 }
156 }
157
158 private InputStream in;
159 private SupportType type;
160 private URL currentReferer; // with only one 'r', as in 'HTTP'...
161
162 // quote chars
163 private char openQuote = Instance.getTrans().getCharacter(
164 StringId.OPEN_SINGLE_QUOTE);
165 private char closeQuote = Instance.getTrans().getCharacter(
166 StringId.CLOSE_SINGLE_QUOTE);
167 private char openDoubleQuote = Instance.getTrans().getCharacter(
168 StringId.OPEN_DOUBLE_QUOTE);
169 private char closeDoubleQuote = Instance.getTrans().getCharacter(
170 StringId.CLOSE_DOUBLE_QUOTE);
171
172 /**
173 * The name of this support class.
174 *
175 * @return the name
176 */
177 protected abstract String getSourceName();
178
179 /**
180 * Check if the given resource is supported by this {@link BasicSupport}.
181 *
182 * @param url
183 * the resource to check for
184 *
185 * @return TRUE if it is
186 */
187 protected abstract boolean supports(URL url);
188
189 /**
190 * Return TRUE if the support will return HTML encoded content values for
191 * the chapters content.
192 *
193 * @return TRUE for HTML
194 */
195 protected abstract boolean isHtml();
196
197 /**
198 * Return the {@link MetaData} of this story.
199 *
200 * @param source
201 * the source of the story
202 * @param in
203 * the input (the main resource)
204 *
205 * @return the associated {@link MetaData}, never NULL
206 *
207 * @throws IOException
208 * in case of I/O error
209 */
210 protected abstract MetaData getMeta(URL source, InputStream in)
211 throws IOException;
212
213 /**
214 * Return the story description.
215 *
216 * @param source
217 * the source of the story
218 * @param in
219 * the input (the main resource)
220 *
221 * @return the description
222 *
223 * @throws IOException
224 * in case of I/O error
225 */
226 protected abstract String getDesc(URL source, InputStream in)
227 throws IOException;
228
229 /**
230 * Return the list of chapters (name and resource).
231 *
232 * @param source
233 * the source of the story
234 * @param in
235 * the input (the main resource)
236 * @param pg
237 * the optional progress reporter
238 *
239 * @return the chapters
240 *
241 * @throws IOException
242 * in case of I/O error
243 */
244 protected abstract List<Entry<String, URL>> getChapters(URL source,
245 InputStream in, Progress pg) throws IOException;
246
247 /**
248 * Return the content of the chapter (possibly HTML encoded, if
249 * {@link BasicSupport#isHtml()} is TRUE).
250 *
251 * @param source
252 * the source of the story
253 * @param in
254 * the input (the main resource)
255 * @param number
256 * the chapter number
257 * @param pg
258 * the optional progress reporter
259 *
260 * @return the content
261 *
262 * @throws IOException
263 * in case of I/O error
264 */
265 protected abstract String getChapterContent(URL source, InputStream in,
266 int number, Progress pg) throws IOException;
267
268 /**
269 * Log into the support (can be a no-op depending upon the support).
270 *
271 * @throws IOException
272 * in case of I/O error
273 */
274 @SuppressWarnings("unused")
275 public void login() throws IOException {
276 }
277
278 /**
279 * Return the list of cookies (values included) that must be used to
280 * correctly fetch the resources.
281 * <p>
282 * You are expected to call the super method implementation if you override
283 * it.
284 *
285 * @return the cookies
286 */
287 public Map<String, String> getCookies() {
288 return new HashMap<String, String>();
289 }
290
291 /**
292 * OAuth authorisation (aka, "bearer XXXXXXX").
293 *
294 * @return the OAuth string
295 */
296 public String getOAuth() {
297 return null;
298 }
299
300 /**
301 * Return the canonical form of the main {@link URL}.
302 *
303 * @param source
304 * the source {@link URL}
305 *
306 * @return the canonical form of this {@link URL}
307 *
308 * @throws IOException
309 * in case of I/O error
310 */
311 @SuppressWarnings("unused")
312 public URL getCanonicalUrl(URL source) throws IOException {
313 return source;
314 }
315
316 /**
317 * Process the given story resource into a partially filled {@link Story}
318 * object containing the name and metadata, except for the description.
319 *
320 * @param url
321 * the story resource
322 *
323 * @return the {@link Story}
324 *
325 * @throws IOException
326 * in case of I/O error
327 */
328 public Story processMeta(URL url) throws IOException {
329 return processMeta(url, true, false, null);
330 }
331
332 /**
333 * Process the given story resource into a partially filled {@link Story}
334 * object containing the name and metadata.
335 *
336 * @param url
337 * the story resource
338 * @param close
339 * close "this" and "in" when done
340 * @param getDesc
341 * retrieve the description of the story, or not
342 * @param pg
343 * the optional progress reporter
344 *
345 * @return the {@link Story}, never NULL
346 *
347 * @throws IOException
348 * in case of I/O error
349 */
350 protected Story processMeta(URL url, boolean close, boolean getDesc,
351 Progress pg) throws IOException {
352 if (pg == null) {
353 pg = new Progress();
354 } else {
355 pg.setMinMax(0, 100);
356 }
357
358 login();
359 pg.setProgress(10);
360
361 url = getCanonicalUrl(url);
362
363 setCurrentReferer(url);
364
365 in = openInput(url); // NULL allowed here
366 try {
367 preprocess(url, getInput());
368 pg.setProgress(30);
369
370 Story story = new Story();
371 MetaData meta = getMeta(url, getInput());
372 if (meta.getCreationDate() == null
373 || meta.getCreationDate().isEmpty()) {
374 meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
375 }
376 story.setMeta(meta);
377
378 pg.setProgress(50);
379
380 if (meta.getCover() == null) {
381 meta.setCover(getDefaultCover(meta.getSubject()));
382 }
383
384 pg.setProgress(60);
385
386 if (getDesc) {
387 String descChapterName = Instance.getTrans().getString(
388 StringId.DESCRIPTION);
389 story.getMeta().setResume(
390 makeChapter(url, 0, descChapterName,
391 getDesc(url, getInput()), null));
392 }
393
394 pg.setProgress(100);
395 return story;
396 } finally {
397 if (close) {
398 try {
399 close();
400 } catch (IOException e) {
401 Instance.getTraceHandler().error(e);
402 }
403
404 if (in != null) {
405 in.close();
406 }
407 }
408
409 setCurrentReferer(null);
410 }
411 }
412
413 /**
414 * Process the given story resource into a fully filled {@link Story}
415 * object.
416 *
417 * @param url
418 * the story resource
419 * @param pg
420 * the optional progress reporter
421 *
422 * @return the {@link Story}, never NULL
423 *
424 * @throws IOException
425 * in case of I/O error
426 */
427 public Story process(URL url, Progress pg) throws IOException {
428 if (pg == null) {
429 pg = new Progress();
430 } else {
431 pg.setMinMax(0, 100);
432 }
433
434 url = getCanonicalUrl(url);
435 pg.setProgress(1);
436 try {
437 Progress pgMeta = new Progress();
438 pg.addProgress(pgMeta, 10);
439 Story story = processMeta(url, false, true, pgMeta);
440 if (!pgMeta.isDone()) {
441 pgMeta.setProgress(pgMeta.getMax()); // 10%
442 }
443
444 pg.setName("Retrieving " + story.getMeta().getTitle());
445
446 setCurrentReferer(url);
447
448 Progress pgGetChapters = new Progress();
449 pg.addProgress(pgGetChapters, 10);
450 story.setChapters(new ArrayList<Chapter>());
451 List<Entry<String, URL>> chapters = getChapters(url, getInput(),
452 pgGetChapters);
453 if (!pgGetChapters.isDone()) {
454 pgGetChapters.setProgress(pgGetChapters.getMax()); // 20%
455 }
456
457 if (chapters != null) {
458 Progress pgChaps = new Progress("Extracting chapters", 0,
459 chapters.size() * 300);
460 pg.addProgress(pgChaps, 80);
461
462 long words = 0;
463 int i = 1;
464 for (Entry<String, URL> chap : chapters) {
465 pgChaps.setName("Extracting chapter " + i);
466 InputStream chapIn = null;
467 if (chap.getValue() != null) {
468 setCurrentReferer(chap.getValue());
469 chapIn = Instance.getCache().open(chap.getValue(),
470 this, true);
471 }
472 pgChaps.setProgress(i * 100);
473 try {
474 Progress pgGetChapterContent = new Progress();
475 Progress pgMakeChapter = new Progress();
476 pgChaps.addProgress(pgGetChapterContent, 100);
477 pgChaps.addProgress(pgMakeChapter, 100);
478
479 String content = getChapterContent(url, chapIn, i,
480 pgGetChapterContent);
481 if (!pgGetChapterContent.isDone()) {
482 pgGetChapterContent.setProgress(pgGetChapterContent
483 .getMax());
484 }
485
486 Chapter cc = makeChapter(url, i, chap.getKey(),
487 content, pgMakeChapter);
488 if (!pgMakeChapter.isDone()) {
489 pgMakeChapter.setProgress(pgMakeChapter.getMax());
490 }
491
492 words += cc.getWords();
493 story.getChapters().add(cc);
494 story.getMeta().setWords(words);
495 } finally {
496 if (chapIn != null) {
497 chapIn.close();
498 }
499 }
500
501 i++;
502 }
503
504 pgChaps.setName("Extracting chapters");
505 } else {
506 pg.setProgress(80);
507 }
508
509 return story;
510
511 } finally {
512 try {
513 close();
514 } catch (IOException e) {
515 Instance.getTraceHandler().error(e);
516 }
517
518 if (in != null) {
519 in.close();
520 }
521
522 setCurrentReferer(null);
523 }
524 }
525
526 /**
527 * The support type.
528 *
529 * @return the type
530 */
531 public SupportType getType() {
532 return type;
533 }
534
535 /**
536 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
537 * the current {@link URL} we work on.
538 *
539 * @return the referer
540 */
541 public URL getCurrentReferer() {
542 return currentReferer;
543 }
544
545 /**
546 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
547 * the current {@link URL} we work on.
548 *
549 * @param currentReferer
550 * the new referer
551 */
552 protected void setCurrentReferer(URL currentReferer) {
553 this.currentReferer = currentReferer;
554 }
555
556 /**
557 * The support type.
558 *
559 * @param type
560 * the new type
561 *
562 * @return this
563 */
564 protected BasicSupport setType(SupportType type) {
565 this.type = type;
566 return this;
567 }
568
569 /**
570 * Prepare the support if needed before processing.
571 *
572 * @param source
573 * the source of the story
574 * @param in
575 * the input (the main resource)
576 *
577 * @throws IOException
578 * on I/O error
579 */
580 @SuppressWarnings("unused")
581 protected void preprocess(URL source, InputStream in) throws IOException {
582 }
583
584 /**
585 * Now that we have processed the {@link Story}, close the resources if any.
586 *
587 * @throws IOException
588 * on I/O error
589 */
590 @SuppressWarnings("unused")
591 protected void close() throws IOException {
592 }
593
594 /**
595 * Create a {@link Chapter} object from the given information, formatting
596 * the content as it should be.
597 *
598 * @param source
599 * the source of the story
600 * @param number
601 * the chapter number
602 * @param name
603 * the chapter name
604 * @param content
605 * the chapter content
606 * @param pg
607 * the optional progress reporter
608 *
609 * @return the {@link Chapter}
610 *
611 * @throws IOException
612 * in case of I/O error
613 */
614 protected Chapter makeChapter(URL source, int number, String name,
615 String content, Progress pg) throws IOException {
616 // Chapter name: process it correctly, then remove the possible
617 // redundant "Chapter x: " in front of it, or "-" (as in
618 // "Chapter 5: - Fun!" after the ": " was automatically added)
619 String chapterName = processPara(name).getContent().trim();
620 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
621 .split(",")) {
622 String chapterWord = Instance.getConfig().getStringX(
623 Config.CHAPTER, lang);
624 if (chapterName.startsWith(chapterWord)) {
625 chapterName = chapterName.substring(chapterWord.length())
626 .trim();
627 break;
628 }
629 }
630
631 if (chapterName.startsWith(Integer.toString(number))) {
632 chapterName = chapterName.substring(
633 Integer.toString(number).length()).trim();
634 }
635
636 while (chapterName.startsWith(":") || chapterName.startsWith("-")) {
637 chapterName = chapterName.substring(1).trim();
638 }
639 //
640
641 Chapter chap = new Chapter(number, chapterName);
642
643 if (content != null) {
644 List<Paragraph> paras = makeParagraphs(source, content, pg);
645 long words = 0;
646 for (Paragraph para : paras) {
647 words += para.getWords();
648 }
649 chap.setParagraphs(paras);
650 chap.setWords(words);
651 }
652
653 return chap;
654
655 }
656
657 /**
658 * Convert the given content into {@link Paragraph}s.
659 *
660 * @param source
661 * the source URL of the story
662 * @param content
663 * the textual content
664 * @param pg
665 * the optional progress reporter
666 *
667 * @return the {@link Paragraph}s
668 *
669 * @throws IOException
670 * in case of I/O error
671 */
672 protected List<Paragraph> makeParagraphs(URL source, String content,
673 Progress pg) throws IOException {
674 if (pg == null) {
675 pg = new Progress();
676 }
677
678 if (isHtml()) {
679 // Special <HR> processing:
680 content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
681 "<br/>* * *<br/>");
682 }
683
684 List<Paragraph> paras = new ArrayList<Paragraph>();
685
686 if (content != null && !content.trim().isEmpty()) {
687 if (isHtml()) {
688 String[] tab = content.split("(<p>|</p>|<br>|<br/>)");
689 pg.setMinMax(0, tab.length);
690 int i = 1;
691 for (String line : tab) {
692 if (line.startsWith("[") && line.endsWith("]")) {
693 pg.setName("Extracting image " + i);
694 }
695 paras.add(makeParagraph(source, line.trim()));
696 pg.setProgress(i++);
697 }
698 pg.setName(null);
699 } else {
700 List<String> lines = new ArrayList<String>();
701 BufferedReader buff = null;
702 try {
703 buff = new BufferedReader(
704 new InputStreamReader(new ByteArrayInputStream(
705 content.getBytes("UTF-8")), "UTF-8"));
706 for (String line = buff.readLine(); line != null; line = buff
707 .readLine()) {
708 lines.add(line.trim());
709 }
710 } finally {
711 if (buff != null) {
712 buff.close();
713 }
714 }
715
716 pg.setMinMax(0, lines.size());
717 int i = 0;
718 for (String line : lines) {
719 if (line.startsWith("[") && line.endsWith("]")) {
720 pg.setName("Extracting image " + i);
721 }
722 paras.add(makeParagraph(source, line));
723 pg.setProgress(i++);
724 }
725 pg.setName(null);
726 }
727
728 // Check quotes for "bad" format
729 List<Paragraph> newParas = new ArrayList<Paragraph>();
730 for (Paragraph para : paras) {
731 newParas.addAll(requotify(para));
732 }
733 paras = newParas;
734
735 // Remove double blanks/brks
736 fixBlanksBreaks(paras);
737 }
738
739 return paras;
740 }
741
742 /**
743 * Convert the given line into a single {@link Paragraph}.
744 *
745 * @param source
746 * the source URL of the story
747 * @param line
748 * the textual content of the paragraph
749 *
750 * @return the {@link Paragraph}
751 */
752 private Paragraph makeParagraph(URL source, String line) {
753 BufferedImage image = null;
754 if (line.startsWith("[") && line.endsWith("]")) {
755 image = getImage(this, source, line.substring(1, line.length() - 1)
756 .trim());
757 }
758
759 if (image != null) {
760 return new Paragraph(image);
761 }
762
763 return processPara(line);
764 }
765
766 /**
767 * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
768 * those {@link Paragraph}s.
769 * <p>
770 * The resulting list will not contain a starting or trailing blank/break
771 * nor 2 blanks or breaks following each other.
772 *
773 * @param paras
774 * the list of {@link Paragraph}s to fix
775 */
776 protected void fixBlanksBreaks(List<Paragraph> paras) {
777 boolean space = false;
778 boolean brk = true;
779 for (int i = 0; i < paras.size(); i++) {
780 Paragraph para = paras.get(i);
781 boolean thisSpace = para.getType() == ParagraphType.BLANK;
782 boolean thisBrk = para.getType() == ParagraphType.BREAK;
783
784 if (i > 0 && space && thisBrk) {
785 paras.remove(i - 1);
786 i--;
787 } else if ((space || brk) && (thisSpace || thisBrk)) {
788 paras.remove(i);
789 i--;
790 }
791
792 space = thisSpace;
793 brk = thisBrk;
794 }
795
796 // Remove blank/brk at start
797 if (paras.size() > 0
798 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
799 0).getType() == ParagraphType.BREAK)) {
800 paras.remove(0);
801 }
802
803 // Remove blank/brk at end
804 int last = paras.size() - 1;
805 if (paras.size() > 0
806 && (paras.get(last).getType() == ParagraphType.BLANK || paras
807 .get(last).getType() == ParagraphType.BREAK)) {
808 paras.remove(last);
809 }
810 }
811
812 /**
813 * Get the default cover related to this subject (see <tt>.info</tt> files).
814 *
815 * @param subject
816 * the subject
817 *
818 * @return the cover if any, or NULL
819 */
820 static BufferedImage getDefaultCover(String subject) {
821 if (subject != null && !subject.isEmpty()
822 && Instance.getCoverDir() != null) {
823 try {
824 File fileCover = new File(Instance.getCoverDir(), subject);
825 return getImage(null, fileCover.toURI().toURL(), subject);
826 } catch (MalformedURLException e) {
827 }
828 }
829
830 return null;
831 }
832
833 /**
834 * Return the list of supported image extensions.
835 *
836 * @param emptyAllowed
837 * TRUE to allow an empty extension on first place, which can be
838 * used when you may already have an extension in your input but
839 * are not sure about it
840 *
841 * @return the extensions
842 */
843 static String[] getImageExt(boolean emptyAllowed) {
844 if (emptyAllowed) {
845 return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
846 }
847
848 return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
849 }
850
851 /**
852 * Check if the given resource can be a local image or a remote image, then
853 * refresh the cache with it if it is.
854 *
855 * @param source
856 * the story source
857 * @param line
858 * the resource to check
859 *
860 * @return the image if found, or NULL
861 *
862 */
863 static BufferedImage getImage(BasicSupport support, URL source, String line) {
864 URL url = getImageUrl(support, source, line);
865 if (url != null) {
866 InputStream in = null;
867 try {
868 in = Instance.getCache().open(url, getSupport(url), true);
869 return ImageUtils.fromStream(in);
870 } catch (IOException e) {
871 } finally {
872 if (in != null) {
873 try {
874 in.close();
875 } catch (IOException e) {
876 }
877 }
878 }
879 }
880
881 return null;
882 }
883
884 /**
885 * Check if the given resource can be a local image or a remote image, then
886 * refresh the cache with it if it is.
887 *
888 * @param source
889 * the story source
890 * @param line
891 * the resource to check
892 *
893 * @return the image URL if found, or NULL
894 *
895 */
896 static URL getImageUrl(BasicSupport support, URL source, String line) {
897 URL url = null;
898
899 if (line != null) {
900 // try for files
901 if (source != null) {
902 try {
903
904 String relPath = null;
905 String absPath = null;
906 try {
907 String path = new File(source.getFile()).getParent();
908 relPath = new File(new File(path), line.trim())
909 .getAbsolutePath();
910 } catch (Exception e) {
911 // Cannot be converted to path (one possibility to take
912 // into account: absolute path on Windows)
913 }
914 try {
915 absPath = new File(line.trim()).getAbsolutePath();
916 } catch (Exception e) {
917 // Cannot be converted to path (at all)
918 }
919
920 for (String ext : getImageExt(true)) {
921 if (absPath != null && new File(absPath + ext).exists()) {
922 url = new File(absPath + ext).toURI().toURL();
923 } else if (relPath != null
924 && new File(relPath + ext).exists()) {
925 url = new File(relPath + ext).toURI().toURL();
926 }
927 }
928 } catch (Exception e) {
929 // Should not happen since we control the correct arguments
930 }
931 }
932
933 if (url == null) {
934 // try for URLs
935 try {
936 for (String ext : getImageExt(true)) {
937 if (Instance.getCache()
938 .check(new URL(line + ext), true)) {
939 url = new URL(line + ext);
940 break;
941 }
942 }
943
944 // try out of cache
945 if (url == null) {
946 for (String ext : getImageExt(true)) {
947 try {
948 url = new URL(line + ext);
949 Instance.getCache().refresh(url, support, true);
950 break;
951 } catch (IOException e) {
952 // no image with this ext
953 url = null;
954 }
955 }
956 }
957 } catch (MalformedURLException e) {
958 // Not an url
959 }
960 }
961
962 // refresh the cached file
963 if (url != null) {
964 try {
965 Instance.getCache().refresh(url, support, true);
966 } catch (IOException e) {
967 // woops, broken image
968 url = null;
969 }
970 }
971 }
972
973 return url;
974 }
975
976 /**
977 * Open the input file that will be used through the support.
978 * <p>
979 * Can return NULL, in which case you are supposed to work without an
980 * {@link InputStream}.
981 *
982 * @param source
983 * the source {@link URL}
984 *
985 * @return the {@link InputStream}
986 *
987 * @throws IOException
988 * in case of I/O error
989 */
990 protected InputStream openInput(URL source) throws IOException {
991 return Instance.getCache().open(source, this, false);
992 }
993
994 /**
995 * Reset then return {@link BasicSupport#in}.
996 *
997 * @return {@link BasicSupport#in}
998 */
999 protected InputStream getInput() {
1000 return reset(in);
1001 }
1002
1003 /**
1004 * Fix the author name if it is prefixed with some "by" {@link String}.
1005 *
1006 * @param author
1007 * the author with a possible prefix
1008 *
1009 * @return the author without prefixes
1010 */
1011 protected String fixAuthor(String author) {
1012 if (author != null) {
1013 for (String suffix : new String[] { " ", ":" }) {
1014 for (String byString : Instance.getConfig()
1015 .getString(Config.BYS).split(",")) {
1016 byString += suffix;
1017 if (author.toUpperCase().startsWith(byString.toUpperCase())) {
1018 author = author.substring(byString.length()).trim();
1019 }
1020 }
1021 }
1022
1023 // Special case (without suffix):
1024 if (author.startsWith("©")) {
1025 author = author.substring(1);
1026 }
1027 }
1028
1029 return author;
1030 }
1031
1032 /**
1033 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
1034 * and requotify them (i.e., separate them into QUOTE paragraphs and other
1035 * paragraphs (quotes or not)).
1036 *
1037 * @param para
1038 * the paragraph to requotify (not necessarily a quote)
1039 *
1040 * @return the correctly (or so we hope) quotified paragraphs
1041 */
1042 protected List<Paragraph> requotify(Paragraph para) {
1043 List<Paragraph> newParas = new ArrayList<Paragraph>();
1044
1045 if (para.getType() == ParagraphType.QUOTE
1046 && para.getContent().length() > 2) {
1047 String line = para.getContent();
1048 boolean singleQ = line.startsWith("" + openQuote);
1049 boolean doubleQ = line.startsWith("" + openDoubleQuote);
1050
1051 // Do not try when more than one quote at a time
1052 // (some stories are not easily readable if we do)
1053 if (singleQ
1054 && line.indexOf(closeQuote, 1) < line
1055 .lastIndexOf(closeQuote)) {
1056 newParas.add(para);
1057 return newParas;
1058 }
1059 if (doubleQ
1060 && line.indexOf(closeDoubleQuote, 1) < line
1061 .lastIndexOf(closeDoubleQuote)) {
1062 newParas.add(para);
1063 return newParas;
1064 }
1065 //
1066
1067 if (!singleQ && !doubleQ) {
1068 line = openDoubleQuote + line + closeDoubleQuote;
1069 newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
1070 .getWords()));
1071 } else {
1072 char open = singleQ ? openQuote : openDoubleQuote;
1073 char close = singleQ ? closeQuote : closeDoubleQuote;
1074
1075 int posDot = -1;
1076 boolean inQuote = false;
1077 int i = 0;
1078 for (char car : line.toCharArray()) {
1079 if (car == open) {
1080 inQuote = true;
1081 } else if (car == close) {
1082 inQuote = false;
1083 } else if (car == '.' && !inQuote) {
1084 posDot = i;
1085 break;
1086 }
1087 i++;
1088 }
1089
1090 if (posDot >= 0) {
1091 String rest = line.substring(posDot + 1).trim();
1092 line = line.substring(0, posDot + 1).trim();
1093 long words = 1;
1094 for (char car : line.toCharArray()) {
1095 if (car == ' ') {
1096 words++;
1097 }
1098 }
1099 newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
1100 if (!rest.isEmpty()) {
1101 newParas.addAll(requotify(processPara(rest)));
1102 }
1103 } else {
1104 newParas.add(para);
1105 }
1106 }
1107 } else {
1108 newParas.add(para);
1109 }
1110
1111 return newParas;
1112 }
1113
1114 /**
1115 * Process a {@link Paragraph} from a raw line of text.
1116 * <p>
1117 * Will also fix quotes and HTML encoding if needed.
1118 *
1119 * @param line
1120 * the raw line
1121 *
1122 * @return the processed {@link Paragraph}
1123 */
1124 protected Paragraph processPara(String line) {
1125 line = ifUnhtml(line).trim();
1126
1127 boolean space = true;
1128 boolean brk = true;
1129 boolean quote = false;
1130 boolean tentativeCloseQuote = false;
1131 char prev = '\0';
1132 int dashCount = 0;
1133 long words = 1;
1134
1135 StringBuilder builder = new StringBuilder();
1136 for (char car : line.toCharArray()) {
1137 if (car != '-') {
1138 if (dashCount > 0) {
1139 // dash, ndash and mdash: - – —
1140 // currently: always use mdash
1141 builder.append(dashCount == 1 ? '-' : '—');
1142 }
1143 dashCount = 0;
1144 }
1145
1146 if (tentativeCloseQuote) {
1147 tentativeCloseQuote = false;
1148 if (Character.isLetterOrDigit(car)) {
1149 builder.append("'");
1150 } else {
1151 // handle double-single quotes as double quotes
1152 if (prev == car) {
1153 builder.append(closeDoubleQuote);
1154 continue;
1155 }
1156
1157 builder.append(closeQuote);
1158 }
1159 }
1160
1161 switch (car) {
1162 case ' ': // note: unbreakable space
1163 case ' ':
1164 case '\t':
1165 case '\n': // just in case
1166 case '\r': // just in case
1167 if (builder.length() > 0
1168 && builder.charAt(builder.length() - 1) != ' ') {
1169 words++;
1170 }
1171 builder.append(' ');
1172 break;
1173
1174 case '\'':
1175 if (space || (brk && quote)) {
1176 quote = true;
1177 // handle double-single quotes as double quotes
1178 if (prev == car) {
1179 builder.deleteCharAt(builder.length() - 1);
1180 builder.append(openDoubleQuote);
1181 } else {
1182 builder.append(openQuote);
1183 }
1184 } else if (prev == ' ' || prev == car) {
1185 // handle double-single quotes as double quotes
1186 if (prev == car) {
1187 builder.deleteCharAt(builder.length() - 1);
1188 builder.append(openDoubleQuote);
1189 } else {
1190 builder.append(openQuote);
1191 }
1192 } else {
1193 // it is a quote ("I'm off") or a 'quote' ("This
1194 // 'good' restaurant"...)
1195 tentativeCloseQuote = true;
1196 }
1197 break;
1198
1199 case '"':
1200 if (space || (brk && quote)) {
1201 quote = true;
1202 builder.append(openDoubleQuote);
1203 } else if (prev == ' ') {
1204 builder.append(openDoubleQuote);
1205 } else {
1206 builder.append(closeDoubleQuote);
1207 }
1208 break;
1209
1210 case '-':
1211 if (space) {
1212 quote = true;
1213 } else {
1214 dashCount++;
1215 }
1216 space = false;
1217 break;
1218
1219 case '*':
1220 case '~':
1221 case '/':
1222 case '\\':
1223 case '<':
1224 case '>':
1225 case '=':
1226 case '+':
1227 case '_':
1228 case '–':
1229 case '—':
1230 space = false;
1231 builder.append(car);
1232 break;
1233
1234 case '‘':
1235 case '`':
1236 case '‹':
1237 case '﹁':
1238 case '〈':
1239 case '「':
1240 if (space || (brk && quote)) {
1241 quote = true;
1242 builder.append(openQuote);
1243 } else {
1244 // handle double-single quotes as double quotes
1245 if (prev == car) {
1246 builder.deleteCharAt(builder.length() - 1);
1247 builder.append(openDoubleQuote);
1248 } else {
1249 builder.append(openQuote);
1250 }
1251 }
1252 space = false;
1253 brk = false;
1254 break;
1255
1256 case '’':
1257 case '›':
1258 case '﹂':
1259 case '〉':
1260 case '」':
1261 space = false;
1262 brk = false;
1263 // handle double-single quotes as double quotes
1264 if (prev == car) {
1265 builder.deleteCharAt(builder.length() - 1);
1266 builder.append(closeDoubleQuote);
1267 } else {
1268 builder.append(closeQuote);
1269 }
1270 break;
1271
1272 case '«':
1273 case '“':
1274 case '﹃':
1275 case '《':
1276 case '『':
1277 if (space || (brk && quote)) {
1278 quote = true;
1279 builder.append(openDoubleQuote);
1280 } else {
1281 builder.append(openDoubleQuote);
1282 }
1283 space = false;
1284 brk = false;
1285 break;
1286
1287 case '»':
1288 case '”':
1289 case '﹄':
1290 case '》':
1291 case '』':
1292 space = false;
1293 brk = false;
1294 builder.append(closeDoubleQuote);
1295 break;
1296
1297 default:
1298 space = false;
1299 brk = false;
1300 builder.append(car);
1301 break;
1302 }
1303
1304 prev = car;
1305 }
1306
1307 if (tentativeCloseQuote) {
1308 tentativeCloseQuote = false;
1309 builder.append(closeQuote);
1310 }
1311
1312 line = builder.toString().trim();
1313
1314 ParagraphType type = ParagraphType.NORMAL;
1315 if (space) {
1316 type = ParagraphType.BLANK;
1317 } else if (brk) {
1318 type = ParagraphType.BREAK;
1319 } else if (quote) {
1320 type = ParagraphType.QUOTE;
1321 }
1322
1323 return new Paragraph(type, line, words);
1324 }
1325
1326 /**
1327 * Remove the HTML from the input <b>if</b> {@link BasicSupport#isHtml()} is
1328 * true.
1329 *
1330 * @param input
1331 * the input
1332 *
1333 * @return the no html version if needed
1334 */
1335 private String ifUnhtml(String input) {
1336 if (isHtml() && input != null) {
1337 return StringUtils.unhtml(input);
1338 }
1339
1340 return input;
1341 }
1342
1343 /**
1344 * Return a {@link BasicSupport} implementation supporting the given
1345 * resource if possible.
1346 *
1347 * @param url
1348 * the story resource
1349 *
1350 * @return an implementation that supports it, or NULL
1351 */
1352 public static BasicSupport getSupport(URL url) {
1353 if (url == null) {
1354 return null;
1355 }
1356
1357 // TEXT and INFO_TEXT always support files (not URLs though)
1358 for (SupportType type : SupportType.values()) {
1359 if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1360 BasicSupport support = getSupport(type);
1361 if (support != null && support.supports(url)) {
1362 return support;
1363 }
1364 }
1365 }
1366
1367 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
1368 SupportType.TEXT }) {
1369 BasicSupport support = getSupport(type);
1370 if (support != null && support.supports(url)) {
1371 return support;
1372 }
1373 }
1374
1375 return null;
1376 }
1377
1378 /**
1379 * Return a {@link BasicSupport} implementation supporting the given type.
1380 *
1381 * @param type
1382 * the type
1383 *
1384 * @return an implementation that supports it, or NULL
1385 */
1386 public static BasicSupport getSupport(SupportType type) {
1387 switch (type) {
1388 case EPUB:
1389 return new Epub().setType(type);
1390 case INFO_TEXT:
1391 return new InfoText().setType(type);
1392 case FIMFICTION:
1393 try {
1394 // Can fail if no client key or NO in options
1395 return new FimfictionApi().setType(type);
1396 } catch (IOException e) {
1397 return new Fimfiction().setType(type);
1398 }
1399 case FANFICTION:
1400 return new Fanfiction().setType(type);
1401 case TEXT:
1402 return new Text().setType(type);
1403 case MANGAFOX:
1404 return new MangaFox().setType(type);
1405 case E621:
1406 return new E621().setType(type);
1407 case YIFFSTAR:
1408 return new YiffStar().setType(type);
1409 case E_HENTAI:
1410 return new EHentai().setType(type);
1411 case CBZ:
1412 return new Cbz().setType(type);
1413 case HTML:
1414 return new Html().setType(type);
1415 }
1416
1417 return null;
1418 }
1419
1420 /**
1421 * Reset the given {@link InputStream} and return it.
1422 *
1423 * @param in
1424 * the {@link InputStream} to reset
1425 *
1426 * @return the same {@link InputStream} after reset
1427 */
1428 static protected InputStream reset(InputStream in) {
1429 try {
1430 if (in != null) {
1431 in.reset();
1432 }
1433 } catch (IOException e) {
1434 }
1435
1436 return in;
1437 }
1438
1439 /**
1440 * Return the first line from the given input which correspond to the given
1441 * selectors.
1442 *
1443 * @param in
1444 * the input
1445 * @param needle
1446 * a string that must be found inside the target line (also
1447 * supports "^" at start to say "only if it starts with" the
1448 * needle)
1449 * @param relativeLine
1450 * the line to return based upon the target line position (-1 =
1451 * the line before, 0 = the target line...)
1452 *
1453 * @return the line
1454 */
1455 static protected String getLine(InputStream in, String needle,
1456 int relativeLine) {
1457 return getLine(in, needle, relativeLine, true);
1458 }
1459
1460 /**
1461 * Return a line from the given input which correspond to the given
1462 * selectors.
1463 *
1464 * @param in
1465 * the input
1466 * @param needle
1467 * a string that must be found inside the target line (also
1468 * supports "^" at start to say "only if it starts with" the
1469 * needle)
1470 * @param relativeLine
1471 * the line to return based upon the target line position (-1 =
1472 * the line before, 0 = the target line...)
1473 * @param first
1474 * takes the first result (as opposed to the last one, which will
1475 * also always spend the input)
1476 *
1477 * @return the line
1478 */
1479 static protected String getLine(InputStream in, String needle,
1480 int relativeLine, boolean first) {
1481 String rep = null;
1482
1483 reset(in);
1484
1485 List<String> lines = new ArrayList<String>();
1486 @SuppressWarnings("resource")
1487 Scanner scan = new Scanner(in, "UTF-8");
1488 int index = -1;
1489 scan.useDelimiter("\\n");
1490 while (scan.hasNext()) {
1491 lines.add(scan.next());
1492
1493 if (index == -1) {
1494 if (needle.startsWith("^")) {
1495 if (lines.get(lines.size() - 1).startsWith(
1496 needle.substring(1))) {
1497 index = lines.size() - 1;
1498 }
1499
1500 } else {
1501 if (lines.get(lines.size() - 1).contains(needle)) {
1502 index = lines.size() - 1;
1503 }
1504 }
1505 }
1506
1507 if (index >= 0 && index + relativeLine < lines.size()) {
1508 rep = lines.get(index + relativeLine);
1509 if (first) {
1510 break;
1511 }
1512 }
1513 }
1514
1515 return rep;
1516 }
1517
1518 /**
1519 * Return the text between the key and the endKey (and optional subKey can
1520 * be passed, in this case we will look for the key first, then take the
1521 * text between the subKey and the endKey).
1522 * <p>
1523 * Will only match the first line with the given key if more than one are
1524 * possible. Which also means that if the subKey or endKey is not found on
1525 * that line, NULL will be returned.
1526 *
1527 * @param in
1528 * the input
1529 * @param key
1530 * the key to match (also supports "^" at start to say
1531 * "only if it starts with" the key)
1532 * @param subKey
1533 * the sub key or NULL if none
1534 * @param endKey
1535 * the end key or NULL for "up to the end"
1536 * @return the text or NULL if not found
1537 */
1538 static protected String getKeyLine(InputStream in, String key,
1539 String subKey, String endKey) {
1540 return getKeyText(getLine(in, key, 0), key, subKey, endKey);
1541 }
1542
1543 /**
1544 * Return the text between the key and the endKey (and optional subKey can
1545 * be passed, in this case we will look for the key first, then take the
1546 * text between the subKey and the endKey).
1547 *
1548 * @param in
1549 * the input
1550 * @param key
1551 * the key to match (also supports "^" at start to say
1552 * "only if it starts with" the key)
1553 * @param subKey
1554 * the sub key or NULL if none
1555 * @param endKey
1556 * the end key or NULL for "up to the end"
1557 * @return the text or NULL if not found
1558 */
1559 static protected String getKeyText(String in, String key, String subKey,
1560 String endKey) {
1561 String result = null;
1562
1563 String line = in;
1564 if (line != null && line.contains(key)) {
1565 line = line.substring(line.indexOf(key) + key.length());
1566 if (subKey == null || subKey.isEmpty() || line.contains(subKey)) {
1567 if (subKey != null) {
1568 line = line.substring(line.indexOf(subKey)
1569 + subKey.length());
1570 }
1571 if (endKey == null || line.contains(endKey)) {
1572 if (endKey != null) {
1573 line = line.substring(0, line.indexOf(endKey));
1574 result = line;
1575 }
1576 }
1577 }
1578 }
1579
1580 return result;
1581 }
1582
1583 /**
1584 * Return the text between the key and the endKey (optional subKeys can be
1585 * passed, in this case we will look for the subKeys first, then take the
1586 * text between the key and the endKey).
1587 *
1588 * @param in
1589 * the input
1590 * @param key
1591 * the key to match
1592 * @param endKey
1593 * the end key or NULL for "up to the end"
1594 * @param afters
1595 * the sub-keys to find before checking for key/endKey
1596 *
1597 * @return the text or NULL if not found
1598 */
1599 static protected String getKeyTextAfter(String in, String key,
1600 String endKey, String... afters) {
1601
1602 if (in != null && !in.isEmpty()) {
1603 int pos = indexOfAfter(in, 0, afters);
1604 if (pos < 0) {
1605 return null;
1606 }
1607
1608 in = in.substring(pos);
1609 }
1610
1611 return getKeyText(in, key, null, endKey);
1612 }
1613
1614 /**
1615 * Return the first index after all the given "afters" have been found in
1616 * the {@link String}, or -1 if it was not possible.
1617 *
1618 * @param in
1619 * the input
1620 * @param startAt
1621 * start at this position in the string
1622 * @param afters
1623 * the sub-keys to find before checking for key/endKey
1624 *
1625 * @return the text or NULL if not found
1626 */
1627 static protected int indexOfAfter(String in, int startAt, String... afters) {
1628 int pos = -1;
1629 if (in != null && !in.isEmpty()) {
1630 pos = startAt;
1631 if (afters != null) {
1632 for (int i = 0; pos >= 0 && i < afters.length; i++) {
1633 String subKey = afters[i];
1634 if (!subKey.isEmpty()) {
1635 pos = in.indexOf(subKey, pos);
1636 if (pos >= 0) {
1637 pos += subKey.length();
1638 }
1639 }
1640 }
1641 }
1642 }
1643
1644 return pos;
1645 }
1646 }