New FimFiction.net API downloading:
[fanfix.git] / src / be / nikiroo / fanfix / supported / BasicSupport.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.awt.image.BufferedImage;
4 import java.io.BufferedReader;
5 import java.io.ByteArrayInputStream;
6 import java.io.File;
7 import java.io.IOException;
8 import java.io.InputStream;
9 import java.io.InputStreamReader;
10 import java.net.MalformedURLException;
11 import java.net.URL;
12 import java.util.ArrayList;
13 import java.util.Date;
14 import java.util.HashMap;
15 import java.util.List;
16 import java.util.Map;
17 import java.util.Map.Entry;
18 import java.util.Scanner;
19
20 import be.nikiroo.fanfix.Instance;
21 import be.nikiroo.fanfix.bundles.Config;
22 import be.nikiroo.fanfix.bundles.StringId;
23 import be.nikiroo.fanfix.data.Chapter;
24 import be.nikiroo.fanfix.data.MetaData;
25 import be.nikiroo.fanfix.data.Paragraph;
26 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
27 import be.nikiroo.fanfix.data.Story;
28 import be.nikiroo.utils.ImageUtils;
29 import be.nikiroo.utils.Progress;
30 import be.nikiroo.utils.StringUtils;
31
32 /**
33 * This class is the base class used by the other support classes. It can be
34 * used outside of this package, and have static method that you can use to get
35 * access to the correct support class.
36 * <p>
37 * It will be used with 'resources' (usually web pages or files).
38 *
39 * @author niki
40 */
41 public abstract class BasicSupport {
42 /**
43 * The supported input types for which we can get a {@link BasicSupport}
44 * object.
45 *
46 * @author niki
47 */
48 public enum SupportType {
49 /** EPUB files created with this program */
50 EPUB,
51 /** Pure text file with some rules */
52 TEXT,
53 /** TEXT but with associated .info file */
54 INFO_TEXT,
55 /** My Little Pony fanfictions */
56 FIMFICTION,
57 /** Fanfictions from a lot of different universes */
58 FANFICTION,
59 /** Website with lots of Mangas */
60 MANGAFOX,
61 /** Furry website with comics support */
62 E621,
63 /** Furry website with stories */
64 YIFFSTAR,
65 /** Comics and images groups, mostly but not only NSFW */
66 E_HENTAI,
67 /** CBZ files */
68 CBZ,
69 /** HTML files */
70 HTML;
71
72 /**
73 * A description of this support type (more information than the
74 * {@link BasicSupport#getSourceName()}).
75 *
76 * @return the description
77 */
78 public String getDesc() {
79 String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
80 this.name());
81
82 if (desc == null) {
83 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
84 }
85
86 return desc;
87 }
88
89 /**
90 * The name of this support type (a short version).
91 *
92 * @return the name
93 */
94 public String getSourceName() {
95 BasicSupport support = BasicSupport.getSupport(this);
96 if (support != null) {
97 return support.getSourceName();
98 }
99
100 return null;
101 }
102
103 @Override
104 public String toString() {
105 return super.toString().toLowerCase();
106 }
107
108 /**
109 * Call {@link SupportType#valueOf(String)} after conversion to upper
110 * case.
111 *
112 * @param typeName
113 * the possible type name
114 *
115 * @return NULL or the type
116 */
117 public static SupportType valueOfUC(String typeName) {
118 return SupportType.valueOf(typeName == null ? null : typeName
119 .toUpperCase());
120 }
121
122 /**
123 * Call {@link SupportType#valueOf(String)} after conversion to upper
124 * case but return NULL for NULL instead of raising exception.
125 *
126 * @param typeName
127 * the possible type name
128 *
129 * @return NULL or the type
130 */
131 public static SupportType valueOfNullOkUC(String typeName) {
132 if (typeName == null) {
133 return null;
134 }
135
136 return SupportType.valueOfUC(typeName);
137 }
138
139 /**
140 * Call {@link SupportType#valueOf(String)} after conversion to upper
141 * case but return NULL in case of error instead of raising an
142 * exception.
143 *
144 * @param typeName
145 * the possible type name
146 *
147 * @return NULL or the type
148 */
149 public static SupportType valueOfAllOkUC(String typeName) {
150 try {
151 return SupportType.valueOfUC(typeName);
152 } catch (Exception e) {
153 return null;
154 }
155 }
156 }
157
158 private InputStream in;
159 private SupportType type;
160 private URL currentReferer; // with only one 'r', as in 'HTTP'...
161
162 // quote chars
163 private char openQuote = Instance.getTrans().getCharacter(
164 StringId.OPEN_SINGLE_QUOTE);
165 private char closeQuote = Instance.getTrans().getCharacter(
166 StringId.CLOSE_SINGLE_QUOTE);
167 private char openDoubleQuote = Instance.getTrans().getCharacter(
168 StringId.OPEN_DOUBLE_QUOTE);
169 private char closeDoubleQuote = Instance.getTrans().getCharacter(
170 StringId.CLOSE_DOUBLE_QUOTE);
171
172 /**
173 * The name of this support class.
174 *
175 * @return the name
176 */
177 protected abstract String getSourceName();
178
179 /**
180 * Check if the given resource is supported by this {@link BasicSupport}.
181 *
182 * @param url
183 * the resource to check for
184 *
185 * @return TRUE if it is
186 */
187 protected abstract boolean supports(URL url);
188
189 /**
190 * Return TRUE if the support will return HTML encoded content values for
191 * the chapters content.
192 *
193 * @return TRUE for HTML
194 */
195 protected abstract boolean isHtml();
196
197 /**
198 * Return the {@link MetaData} of this story.
199 *
200 * @param source
201 * the source of the story
202 * @param in
203 * the input (the main resource)
204 *
205 * @return the associated {@link MetaData}
206 *
207 * @throws IOException
208 * in case of I/O error
209 */
210 protected abstract MetaData getMeta(URL source, InputStream in)
211 throws IOException;
212
213 /**
214 * Return the story description.
215 *
216 * @param source
217 * the source of the story
218 * @param in
219 * the input (the main resource)
220 *
221 * @return the description
222 *
223 * @throws IOException
224 * in case of I/O error
225 */
226 protected abstract String getDesc(URL source, InputStream in)
227 throws IOException;
228
229 /**
230 * Return the list of chapters (name and resource).
231 *
232 * @param source
233 * the source of the story
234 * @param in
235 * the input (the main resource)
236 * @param pg
237 * the optional progress reporter
238 *
239 * @return the chapters
240 *
241 * @throws IOException
242 * in case of I/O error
243 */
244 protected abstract List<Entry<String, URL>> getChapters(URL source,
245 InputStream in, Progress pg) throws IOException;
246
247 /**
248 * Return the content of the chapter (possibly HTML encoded, if
249 * {@link BasicSupport#isHtml()} is TRUE).
250 *
251 * @param source
252 * the source of the story
253 * @param in
254 * the input (the main resource)
255 * @param number
256 * the chapter number
257 * @param pg
258 * the optional progress reporter
259 *
260 * @return the content
261 *
262 * @throws IOException
263 * in case of I/O error
264 */
265 protected abstract String getChapterContent(URL source, InputStream in,
266 int number, Progress pg) throws IOException;
267
268 /**
269 * Log into the support (can be a no-op depending upon the support).
270 *
271 * @throws IOException
272 * in case of I/O error
273 */
274 @SuppressWarnings("unused")
275 public void login() throws IOException {
276 }
277
278 /**
279 * Return the list of cookies (values included) that must be used to
280 * correctly fetch the resources.
281 * <p>
282 * You are expected to call the super method implementation if you override
283 * it.
284 *
285 * @return the cookies
286 */
287 public Map<String, String> getCookies() {
288 return new HashMap<String, String>();
289 }
290
291 /**
292 * OAuth authorisation (aka, "bearer XXXXXXX").
293 *
294 * @return the OAuth string
295 */
296 public String getOAuth() {
297 return null;
298 }
299
300 /**
301 * Return the canonical form of the main {@link URL}.
302 *
303 * @param source
304 * the source {@link URL}
305 *
306 * @return the canonical form of this {@link URL}
307 *
308 * @throws IOException
309 * in case of I/O error
310 */
311 @SuppressWarnings("unused")
312 public URL getCanonicalUrl(URL source) throws IOException {
313 return source;
314 }
315
316 /**
317 * Process the given story resource into a partially filled {@link Story}
318 * object containing the name and metadata, except for the description.
319 *
320 * @param url
321 * the story resource
322 *
323 * @return the {@link Story}
324 *
325 * @throws IOException
326 * in case of I/O error
327 */
328 public Story processMeta(URL url) throws IOException {
329 return processMeta(url, true, false, null);
330 }
331
332 /**
333 * Process the given story resource into a partially filled {@link Story}
334 * object containing the name and metadata.
335 *
336 * @param url
337 * the story resource
338 * @param close
339 * close "this" and "in" when done
340 * @param getDesc
341 * retrieve the description of the story, or not
342 * @param pg
343 * the optional progress reporter
344 *
345 * @return the {@link Story}
346 *
347 * @throws IOException
348 * in case of I/O error
349 */
350 protected Story processMeta(URL url, boolean close, boolean getDesc,
351 Progress pg) throws IOException {
352 if (pg == null) {
353 pg = new Progress();
354 } else {
355 pg.setMinMax(0, 100);
356 }
357
358 login();
359 pg.setProgress(10);
360
361 url = getCanonicalUrl(url);
362
363 setCurrentReferer(url);
364
365 in = openInput(url); // NULL allowed here
366 try {
367 preprocess(url, getInput());
368 pg.setProgress(30);
369
370 Story story = new Story();
371 MetaData meta = getMeta(url, getInput());
372 if (meta.getCreationDate() == null
373 || meta.getCreationDate().isEmpty()) {
374 meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
375 }
376 story.setMeta(meta);
377
378 pg.setProgress(50);
379
380 if (meta.getCover() == null) {
381 meta.setCover(getDefaultCover(meta.getSubject()));
382 }
383
384 pg.setProgress(60);
385
386 if (getDesc) {
387 String descChapterName = Instance.getTrans().getString(
388 StringId.DESCRIPTION);
389 story.getMeta().setResume(
390 makeChapter(url, 0, descChapterName,
391 getDesc(url, getInput()), null));
392 }
393
394 pg.setProgress(100);
395 return story;
396 } finally {
397 if (close) {
398 try {
399 close();
400 } catch (IOException e) {
401 Instance.syserr(e);
402 }
403
404 if (in != null) {
405 in.close();
406 }
407 }
408
409 setCurrentReferer(null);
410 }
411 }
412
413 /**
414 * Process the given story resource into a fully filled {@link Story}
415 * object.
416 *
417 * @param url
418 * the story resource
419 * @param pg
420 * the optional progress reporter
421 *
422 * @return the {@link Story}
423 *
424 * @throws IOException
425 * in case of I/O error
426 */
427 public Story process(URL url, Progress pg) throws IOException {
428 if (pg == null) {
429 pg = new Progress();
430 } else {
431 pg.setMinMax(0, 100);
432 }
433
434 url = getCanonicalUrl(url);
435 pg.setProgress(1);
436 try {
437 Progress pgMeta = new Progress();
438 pg.addProgress(pgMeta, 10);
439 Story story = processMeta(url, false, true, pgMeta);
440 if (!pgMeta.isDone()) {
441 pgMeta.setProgress(pgMeta.getMax()); // 10%
442 }
443
444 if (story == null) {
445 pg.setProgress(90);
446 return null;
447 }
448
449 pg.setName("Retrieving " + story.getMeta().getTitle());
450
451 setCurrentReferer(url);
452
453 Progress pgGetChapters = new Progress();
454 pg.addProgress(pgGetChapters, 10);
455 story.setChapters(new ArrayList<Chapter>());
456 List<Entry<String, URL>> chapters = getChapters(url, getInput(),
457 pgGetChapters);
458 if (!pgGetChapters.isDone()) {
459 pgGetChapters.setProgress(pgGetChapters.getMax()); // 20%
460 }
461
462 if (chapters != null) {
463 Progress pgChaps = new Progress("Extracting chapters", 0,
464 chapters.size() * 300);
465 pg.addProgress(pgChaps, 80);
466
467 long words = 0;
468 int i = 1;
469 for (Entry<String, URL> chap : chapters) {
470 pgChaps.setName("Extracting chapter " + i);
471 InputStream chapIn = null;
472 if (chap.getValue() != null) {
473 setCurrentReferer(chap.getValue());
474 chapIn = Instance.getCache().open(chap.getValue(),
475 this, true);
476 }
477 pgChaps.setProgress(i * 100);
478 try {
479 Progress pgGetChapterContent = new Progress();
480 Progress pgMakeChapter = new Progress();
481 pgChaps.addProgress(pgGetChapterContent, 100);
482 pgChaps.addProgress(pgMakeChapter, 100);
483
484 String content = getChapterContent(url, chapIn, i,
485 pgGetChapterContent);
486 if (!pgGetChapterContent.isDone()) {
487 pgGetChapterContent.setProgress(pgGetChapterContent
488 .getMax());
489 }
490
491 Chapter cc = makeChapter(url, i, chap.getKey(),
492 content, pgMakeChapter);
493 if (!pgMakeChapter.isDone()) {
494 pgMakeChapter.setProgress(pgMakeChapter.getMax());
495 }
496
497 words += cc.getWords();
498 story.getChapters().add(cc);
499 if (story.getMeta() != null) {
500 story.getMeta().setWords(words);
501 }
502 } finally {
503 if (chapIn != null) {
504 chapIn.close();
505 }
506 }
507
508 i++;
509 }
510
511 pgChaps.setName("Extracting chapters");
512 } else {
513 pg.setProgress(80);
514 }
515
516 return story;
517
518 } finally {
519 try {
520 close();
521 } catch (IOException e) {
522 Instance.syserr(e);
523 }
524
525 if (in != null) {
526 in.close();
527 }
528
529 setCurrentReferer(null);
530 }
531 }
532
533 /**
534 * The support type.
535 *
536 * @return the type
537 */
538 public SupportType getType() {
539 return type;
540 }
541
542 /**
543 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
544 * the current {@link URL} we work on.
545 *
546 * @return the referer
547 */
548 public URL getCurrentReferer() {
549 return currentReferer;
550 }
551
552 /**
553 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
554 * the current {@link URL} we work on.
555 *
556 * @param currentReferer
557 * the new referer
558 */
559 protected void setCurrentReferer(URL currentReferer) {
560 this.currentReferer = currentReferer;
561 }
562
563 /**
564 * The support type.
565 *
566 * @param type
567 * the new type
568 *
569 * @return this
570 */
571 protected BasicSupport setType(SupportType type) {
572 this.type = type;
573 return this;
574 }
575
576 /**
577 * Prepare the support if needed before processing.
578 *
579 * @param source
580 * the source of the story
581 * @param in
582 * the input (the main resource)
583 *
584 * @throws IOException
585 * on I/O error
586 */
587 @SuppressWarnings("unused")
588 protected void preprocess(URL source, InputStream in) throws IOException {
589 }
590
591 /**
592 * Now that we have processed the {@link Story}, close the resources if any.
593 *
594 * @throws IOException
595 * on I/O error
596 */
597 @SuppressWarnings("unused")
598 protected void close() throws IOException {
599 }
600
601 /**
602 * Create a {@link Chapter} object from the given information, formatting
603 * the content as it should be.
604 *
605 * @param source
606 * the source of the story
607 * @param number
608 * the chapter number
609 * @param name
610 * the chapter name
611 * @param content
612 * the chapter content
613 * @param pg
614 * the optional progress reporter
615 *
616 * @return the {@link Chapter}
617 *
618 * @throws IOException
619 * in case of I/O error
620 */
621 protected Chapter makeChapter(URL source, int number, String name,
622 String content, Progress pg) throws IOException {
623 // Chapter name: process it correctly, then remove the possible
624 // redundant "Chapter x: " in front of it, or "-" (as in
625 // "Chapter 5: - Fun!" after the ": " was automatically added)
626 String chapterName = processPara(name).getContent().trim();
627 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
628 .split(",")) {
629 String chapterWord = Instance.getConfig().getStringX(
630 Config.CHAPTER, lang);
631 if (chapterName.startsWith(chapterWord)) {
632 chapterName = chapterName.substring(chapterWord.length())
633 .trim();
634 break;
635 }
636 }
637
638 if (chapterName.startsWith(Integer.toString(number))) {
639 chapterName = chapterName.substring(
640 Integer.toString(number).length()).trim();
641 }
642
643 while (chapterName.startsWith(":") || chapterName.startsWith("-")) {
644 chapterName = chapterName.substring(1).trim();
645 }
646 //
647
648 Chapter chap = new Chapter(number, chapterName);
649
650 if (content != null) {
651 List<Paragraph> paras = makeParagraphs(source, content, pg);
652 long words = 0;
653 for (Paragraph para : paras) {
654 words += para.getWords();
655 }
656 chap.setParagraphs(paras);
657 chap.setWords(words);
658 }
659
660 return chap;
661
662 }
663
664 /**
665 * Convert the given content into {@link Paragraph}s.
666 *
667 * @param source
668 * the source URL of the story
669 * @param content
670 * the textual content
671 * @param pg
672 * the optional progress reporter
673 *
674 * @return the {@link Paragraph}s
675 *
676 * @throws IOException
677 * in case of I/O error
678 */
679 protected List<Paragraph> makeParagraphs(URL source, String content,
680 Progress pg) throws IOException {
681 if (pg == null) {
682 pg = new Progress();
683 }
684
685 if (isHtml()) {
686 // Special <HR> processing:
687 content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
688 "<br/>* * *<br/>");
689 }
690
691 List<Paragraph> paras = new ArrayList<Paragraph>();
692
693 if (content != null && !content.trim().isEmpty()) {
694 if (isHtml()) {
695 String[] tab = content.split("(<p>|</p>|<br>|<br/>)");
696 pg.setMinMax(0, tab.length);
697 int i = 1;
698 for (String line : tab) {
699 if (line.startsWith("[") && line.endsWith("]")) {
700 pg.setName("Extracting image " + i);
701 }
702 paras.add(makeParagraph(source, line.trim()));
703 pg.setProgress(i++);
704 }
705 pg.setName(null);
706 } else {
707 List<String> lines = new ArrayList<String>();
708 BufferedReader buff = null;
709 try {
710 buff = new BufferedReader(
711 new InputStreamReader(new ByteArrayInputStream(
712 content.getBytes("UTF-8")), "UTF-8"));
713 for (String line = buff.readLine(); line != null; line = buff
714 .readLine()) {
715 lines.add(line.trim());
716 }
717 } finally {
718 if (buff != null) {
719 buff.close();
720 }
721 }
722
723 pg.setMinMax(0, lines.size());
724 int i = 0;
725 for (String line : lines) {
726 if (line.startsWith("[") && line.endsWith("]")) {
727 pg.setName("Extracting image " + i);
728 }
729 paras.add(makeParagraph(source, line));
730 pg.setProgress(i++);
731 }
732 pg.setName(null);
733 }
734
735 // Check quotes for "bad" format
736 List<Paragraph> newParas = new ArrayList<Paragraph>();
737 for (Paragraph para : paras) {
738 newParas.addAll(requotify(para));
739 }
740 paras = newParas;
741
742 // Remove double blanks/brks
743 fixBlanksBreaks(paras);
744 }
745
746 return paras;
747 }
748
749 /**
750 * Convert the given line into a single {@link Paragraph}.
751 *
752 * @param source
753 * the source URL of the story
754 * @param line
755 * the textual content of the paragraph
756 *
757 * @return the {@link Paragraph}
758 */
759 private Paragraph makeParagraph(URL source, String line) {
760 URL image = null;
761 if (line.startsWith("[") && line.endsWith("]")) {
762 image = getImageUrl(this, source,
763 line.substring(1, line.length() - 1).trim());
764 }
765
766 if (image != null) {
767 return new Paragraph(image);
768 }
769
770 return processPara(line);
771 }
772
773 /**
774 * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
775 * those {@link Paragraph}s.
776 * <p>
777 * The resulting list will not contain a starting or trailing blank/break
778 * nor 2 blanks or breaks following each other.
779 *
780 * @param paras
781 * the list of {@link Paragraph}s to fix
782 */
783 protected void fixBlanksBreaks(List<Paragraph> paras) {
784 boolean space = false;
785 boolean brk = true;
786 for (int i = 0; i < paras.size(); i++) {
787 Paragraph para = paras.get(i);
788 boolean thisSpace = para.getType() == ParagraphType.BLANK;
789 boolean thisBrk = para.getType() == ParagraphType.BREAK;
790
791 if (i > 0 && space && thisBrk) {
792 paras.remove(i - 1);
793 i--;
794 } else if ((space || brk) && (thisSpace || thisBrk)) {
795 paras.remove(i);
796 i--;
797 }
798
799 space = thisSpace;
800 brk = thisBrk;
801 }
802
803 // Remove blank/brk at start
804 if (paras.size() > 0
805 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
806 0).getType() == ParagraphType.BREAK)) {
807 paras.remove(0);
808 }
809
810 // Remove blank/brk at end
811 int last = paras.size() - 1;
812 if (paras.size() > 0
813 && (paras.get(last).getType() == ParagraphType.BLANK || paras
814 .get(last).getType() == ParagraphType.BREAK)) {
815 paras.remove(last);
816 }
817 }
818
819 /**
820 * Get the default cover related to this subject (see <tt>.info</tt> files).
821 *
822 * @param subject
823 * the subject
824 *
825 * @return the cover if any, or NULL
826 */
827 static BufferedImage getDefaultCover(String subject) {
828 if (subject != null && !subject.isEmpty()
829 && Instance.getCoverDir() != null) {
830 try {
831 File fileCover = new File(Instance.getCoverDir(), subject);
832 return getImage(null, fileCover.toURI().toURL(), subject);
833 } catch (MalformedURLException e) {
834 }
835 }
836
837 return null;
838 }
839
840 /**
841 * Return the list of supported image extensions.
842 *
843 * @param emptyAllowed
844 * TRUE to allow an empty extension on first place, which can be
845 * used when you may already have an extension in your input but
846 * are not sure about it
847 *
848 * @return the extensions
849 */
850 static String[] getImageExt(boolean emptyAllowed) {
851 if (emptyAllowed) {
852 return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
853 }
854
855 return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
856 }
857
858 /**
859 * Check if the given resource can be a local image or a remote image, then
860 * refresh the cache with it if it is.
861 *
862 * @param source
863 * the story source
864 * @param line
865 * the resource to check
866 *
867 * @return the image if found, or NULL
868 *
869 */
870 static BufferedImage getImage(BasicSupport support, URL source, String line) {
871 URL url = getImageUrl(support, source, line);
872 if (url != null) {
873 InputStream in = null;
874 try {
875 in = Instance.getCache().open(url, getSupport(url), true);
876 return ImageUtils.fromStream(in);
877 } catch (IOException e) {
878 } finally {
879 if (in != null) {
880 try {
881 in.close();
882 } catch (IOException e) {
883 }
884 }
885 }
886 }
887
888 return null;
889 }
890
891 /**
892 * Check if the given resource can be a local image or a remote image, then
893 * refresh the cache with it if it is.
894 *
895 * @param source
896 * the story source
897 * @param line
898 * the resource to check
899 *
900 * @return the image URL if found, or NULL
901 *
902 */
903 static URL getImageUrl(BasicSupport support, URL source, String line) {
904 URL url = null;
905
906 if (line != null) {
907 // try for files
908 if (source != null) {
909 try {
910
911 String relPath = null;
912 String absPath = null;
913 try {
914 String path = new File(source.getFile()).getParent();
915 relPath = new File(new File(path), line.trim())
916 .getAbsolutePath();
917 } catch (Exception e) {
918 // Cannot be converted to path (one possibility to take
919 // into account: absolute path on Windows)
920 }
921 try {
922 absPath = new File(line.trim()).getAbsolutePath();
923 } catch (Exception e) {
924 // Cannot be converted to path (at all)
925 }
926
927 for (String ext : getImageExt(true)) {
928 if (absPath != null && new File(absPath + ext).exists()) {
929 url = new File(absPath + ext).toURI().toURL();
930 } else if (relPath != null
931 && new File(relPath + ext).exists()) {
932 url = new File(relPath + ext).toURI().toURL();
933 }
934 }
935 } catch (Exception e) {
936 // Should not happen since we control the correct arguments
937 }
938 }
939
940 if (url == null) {
941 // try for URLs
942 try {
943 for (String ext : getImageExt(true)) {
944 if (Instance.getCache().check(new URL(line + ext))) {
945 url = new URL(line + ext);
946 break;
947 }
948 }
949
950 // try out of cache
951 if (url == null) {
952 for (String ext : getImageExt(true)) {
953 try {
954 url = new URL(line + ext);
955 Instance.getCache().refresh(url, support, true);
956 break;
957 } catch (IOException e) {
958 // no image with this ext
959 url = null;
960 }
961 }
962 }
963 } catch (MalformedURLException e) {
964 // Not an url
965 }
966 }
967
968 // refresh the cached file
969 if (url != null) {
970 try {
971 Instance.getCache().refresh(url, support, true);
972 } catch (IOException e) {
973 // woops, broken image
974 url = null;
975 }
976 }
977 }
978
979 return url;
980 }
981
982 /**
983 * Open the input file that will be used through the support.
984 * <p>
985 * Can return NULL, in which case you are supposed to work without an
986 * {@link InputStream}.
987 *
988 * @param source
989 * the source {@link URL}
990 *
991 * @return the {@link InputStream}
992 *
993 * @throws IOException
994 * in case of I/O error
995 */
996 protected InputStream openInput(URL source) throws IOException {
997 return Instance.getCache().open(source, this, false);
998 }
999
1000 /**
1001 * Reset then return {@link BasicSupport#in}.
1002 *
1003 * @return {@link BasicSupport#in}
1004 */
1005 protected InputStream getInput() {
1006 return reset(in);
1007 }
1008
1009 /**
1010 * Fix the author name if it is prefixed with some "by" {@link String}.
1011 *
1012 * @param author
1013 * the author with a possible prefix
1014 *
1015 * @return the author without prefixes
1016 */
1017 protected String fixAuthor(String author) {
1018 if (author != null) {
1019 for (String suffix : new String[] { " ", ":" }) {
1020 for (String byString : Instance.getConfig()
1021 .getString(Config.BYS).split(",")) {
1022 byString += suffix;
1023 if (author.toUpperCase().startsWith(byString.toUpperCase())) {
1024 author = author.substring(byString.length()).trim();
1025 }
1026 }
1027 }
1028
1029 // Special case (without suffix):
1030 if (author.startsWith("©")) {
1031 author = author.substring(1);
1032 }
1033 }
1034
1035 return author;
1036 }
1037
1038 /**
1039 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
1040 * and requotify them (i.e., separate them into QUOTE paragraphs and other
1041 * paragraphs (quotes or not)).
1042 *
1043 * @param para
1044 * the paragraph to requotify (not necessarily a quote)
1045 *
1046 * @return the correctly (or so we hope) quotified paragraphs
1047 */
1048 protected List<Paragraph> requotify(Paragraph para) {
1049 List<Paragraph> newParas = new ArrayList<Paragraph>();
1050
1051 if (para.getType() == ParagraphType.QUOTE
1052 && para.getContent().length() > 2) {
1053 String line = para.getContent();
1054 boolean singleQ = line.startsWith("" + openQuote);
1055 boolean doubleQ = line.startsWith("" + openDoubleQuote);
1056
1057 // Do not try when more than one quote at a time
1058 // (some stories are not easily readable if we do)
1059 if (singleQ
1060 && line.indexOf(closeQuote, 1) < line
1061 .lastIndexOf(closeQuote)) {
1062 newParas.add(para);
1063 return newParas;
1064 }
1065 if (doubleQ
1066 && line.indexOf(closeDoubleQuote, 1) < line
1067 .lastIndexOf(closeDoubleQuote)) {
1068 newParas.add(para);
1069 return newParas;
1070 }
1071 //
1072
1073 if (!singleQ && !doubleQ) {
1074 line = openDoubleQuote + line + closeDoubleQuote;
1075 newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
1076 .getWords()));
1077 } else {
1078 char open = singleQ ? openQuote : openDoubleQuote;
1079 char close = singleQ ? closeQuote : closeDoubleQuote;
1080
1081 int posDot = -1;
1082 boolean inQuote = false;
1083 int i = 0;
1084 for (char car : line.toCharArray()) {
1085 if (car == open) {
1086 inQuote = true;
1087 } else if (car == close) {
1088 inQuote = false;
1089 } else if (car == '.' && !inQuote) {
1090 posDot = i;
1091 break;
1092 }
1093 i++;
1094 }
1095
1096 if (posDot >= 0) {
1097 String rest = line.substring(posDot + 1).trim();
1098 line = line.substring(0, posDot + 1).trim();
1099 long words = 1;
1100 for (char car : line.toCharArray()) {
1101 if (car == ' ') {
1102 words++;
1103 }
1104 }
1105 newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
1106 if (!rest.isEmpty()) {
1107 newParas.addAll(requotify(processPara(rest)));
1108 }
1109 } else {
1110 newParas.add(para);
1111 }
1112 }
1113 } else {
1114 newParas.add(para);
1115 }
1116
1117 return newParas;
1118 }
1119
1120 /**
1121 * Process a {@link Paragraph} from a raw line of text.
1122 * <p>
1123 * Will also fix quotes and HTML encoding if needed.
1124 *
1125 * @param line
1126 * the raw line
1127 *
1128 * @return the processed {@link Paragraph}
1129 */
1130 protected Paragraph processPara(String line) {
1131 line = ifUnhtml(line).trim();
1132
1133 boolean space = true;
1134 boolean brk = true;
1135 boolean quote = false;
1136 boolean tentativeCloseQuote = false;
1137 char prev = '\0';
1138 int dashCount = 0;
1139 long words = 1;
1140
1141 StringBuilder builder = new StringBuilder();
1142 for (char car : line.toCharArray()) {
1143 if (car != '-') {
1144 if (dashCount > 0) {
1145 // dash, ndash and mdash: - – —
1146 // currently: always use mdash
1147 builder.append(dashCount == 1 ? '-' : '—');
1148 }
1149 dashCount = 0;
1150 }
1151
1152 if (tentativeCloseQuote) {
1153 tentativeCloseQuote = false;
1154 if (Character.isLetterOrDigit(car)) {
1155 builder.append("'");
1156 } else {
1157 // handle double-single quotes as double quotes
1158 if (prev == car) {
1159 builder.append(closeDoubleQuote);
1160 continue;
1161 }
1162
1163 builder.append(closeQuote);
1164 }
1165 }
1166
1167 switch (car) {
1168 case ' ': // note: unbreakable space
1169 case ' ':
1170 case '\t':
1171 case '\n': // just in case
1172 case '\r': // just in case
1173 if (builder.length() > 0
1174 && builder.charAt(builder.length() - 1) != ' ') {
1175 words++;
1176 }
1177 builder.append(' ');
1178 break;
1179
1180 case '\'':
1181 if (space || (brk && quote)) {
1182 quote = true;
1183 // handle double-single quotes as double quotes
1184 if (prev == car) {
1185 builder.deleteCharAt(builder.length() - 1);
1186 builder.append(openDoubleQuote);
1187 } else {
1188 builder.append(openQuote);
1189 }
1190 } else if (prev == ' ' || prev == car) {
1191 // handle double-single quotes as double quotes
1192 if (prev == car) {
1193 builder.deleteCharAt(builder.length() - 1);
1194 builder.append(openDoubleQuote);
1195 } else {
1196 builder.append(openQuote);
1197 }
1198 } else {
1199 // it is a quote ("I'm off") or a 'quote' ("This
1200 // 'good' restaurant"...)
1201 tentativeCloseQuote = true;
1202 }
1203 break;
1204
1205 case '"':
1206 if (space || (brk && quote)) {
1207 quote = true;
1208 builder.append(openDoubleQuote);
1209 } else if (prev == ' ') {
1210 builder.append(openDoubleQuote);
1211 } else {
1212 builder.append(closeDoubleQuote);
1213 }
1214 break;
1215
1216 case '-':
1217 if (space) {
1218 quote = true;
1219 } else {
1220 dashCount++;
1221 }
1222 space = false;
1223 break;
1224
1225 case '*':
1226 case '~':
1227 case '/':
1228 case '\\':
1229 case '<':
1230 case '>':
1231 case '=':
1232 case '+':
1233 case '_':
1234 case '–':
1235 case '—':
1236 space = false;
1237 builder.append(car);
1238 break;
1239
1240 case '‘':
1241 case '`':
1242 case '‹':
1243 case '﹁':
1244 case '〈':
1245 case '「':
1246 if (space || (brk && quote)) {
1247 quote = true;
1248 builder.append(openQuote);
1249 } else {
1250 // handle double-single quotes as double quotes
1251 if (prev == car) {
1252 builder.deleteCharAt(builder.length() - 1);
1253 builder.append(openDoubleQuote);
1254 } else {
1255 builder.append(openQuote);
1256 }
1257 }
1258 space = false;
1259 brk = false;
1260 break;
1261
1262 case '’':
1263 case '›':
1264 case '﹂':
1265 case '〉':
1266 case '」':
1267 space = false;
1268 brk = false;
1269 // handle double-single quotes as double quotes
1270 if (prev == car) {
1271 builder.deleteCharAt(builder.length() - 1);
1272 builder.append(closeDoubleQuote);
1273 } else {
1274 builder.append(closeQuote);
1275 }
1276 break;
1277
1278 case '«':
1279 case '“':
1280 case '﹃':
1281 case '《':
1282 case '『':
1283 if (space || (brk && quote)) {
1284 quote = true;
1285 builder.append(openDoubleQuote);
1286 } else {
1287 builder.append(openDoubleQuote);
1288 }
1289 space = false;
1290 brk = false;
1291 break;
1292
1293 case '»':
1294 case '”':
1295 case '﹄':
1296 case '》':
1297 case '』':
1298 space = false;
1299 brk = false;
1300 builder.append(closeDoubleQuote);
1301 break;
1302
1303 default:
1304 space = false;
1305 brk = false;
1306 builder.append(car);
1307 break;
1308 }
1309
1310 prev = car;
1311 }
1312
1313 if (tentativeCloseQuote) {
1314 tentativeCloseQuote = false;
1315 builder.append(closeQuote);
1316 }
1317
1318 line = builder.toString().trim();
1319
1320 ParagraphType type = ParagraphType.NORMAL;
1321 if (space) {
1322 type = ParagraphType.BLANK;
1323 } else if (brk) {
1324 type = ParagraphType.BREAK;
1325 } else if (quote) {
1326 type = ParagraphType.QUOTE;
1327 }
1328
1329 return new Paragraph(type, line, words);
1330 }
1331
1332 /**
1333 * Remove the HTML from the input <b>if</b> {@link BasicSupport#isHtml()} is
1334 * true.
1335 *
1336 * @param input
1337 * the input
1338 *
1339 * @return the no html version if needed
1340 */
1341 private String ifUnhtml(String input) {
1342 if (isHtml() && input != null) {
1343 return StringUtils.unhtml(input);
1344 }
1345
1346 return input;
1347 }
1348
1349 /**
1350 * Return a {@link BasicSupport} implementation supporting the given
1351 * resource if possible.
1352 *
1353 * @param url
1354 * the story resource
1355 *
1356 * @return an implementation that supports it, or NULL
1357 */
1358 public static BasicSupport getSupport(URL url) {
1359 if (url == null) {
1360 return null;
1361 }
1362
1363 // TEXT and INFO_TEXT always support files (not URLs though)
1364 for (SupportType type : SupportType.values()) {
1365 if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1366 BasicSupport support = getSupport(type);
1367 if (support != null && support.supports(url)) {
1368 return support;
1369 }
1370 }
1371 }
1372
1373 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
1374 SupportType.TEXT }) {
1375 BasicSupport support = getSupport(type);
1376 if (support != null && support.supports(url)) {
1377 return support;
1378 }
1379 }
1380
1381 return null;
1382 }
1383
1384 /**
1385 * Return a {@link BasicSupport} implementation supporting the given type.
1386 *
1387 * @param type
1388 * the type
1389 *
1390 * @return an implementation that supports it, or NULL
1391 */
1392 public static BasicSupport getSupport(SupportType type) {
1393 switch (type) {
1394 case EPUB:
1395 return new Epub().setType(type);
1396 case INFO_TEXT:
1397 return new InfoText().setType(type);
1398 case FIMFICTION:
1399 try {
1400 // Can fail if no client key or NO in options
1401 return new FimfictionApi().setType(type);
1402 } catch (IOException e) {
1403 return new Fimfiction().setType(type);
1404 }
1405 case FANFICTION:
1406 return new Fanfiction().setType(type);
1407 case TEXT:
1408 return new Text().setType(type);
1409 case MANGAFOX:
1410 return new MangaFox().setType(type);
1411 case E621:
1412 return new E621().setType(type);
1413 case YIFFSTAR:
1414 return new YiffStar().setType(type);
1415 case E_HENTAI:
1416 return new EHentai().setType(type);
1417 case CBZ:
1418 return new Cbz().setType(type);
1419 case HTML:
1420 return new Html().setType(type);
1421 }
1422
1423 return null;
1424 }
1425
1426 /**
1427 * Reset the given {@link InputStream} and return it.
1428 *
1429 * @param in
1430 * the {@link InputStream} to reset
1431 *
1432 * @return the same {@link InputStream} after reset
1433 */
1434 static protected InputStream reset(InputStream in) {
1435 try {
1436 if (in != null) {
1437 in.reset();
1438 }
1439 } catch (IOException e) {
1440 }
1441
1442 return in;
1443 }
1444
1445 /**
1446 * Return the first line from the given input which correspond to the given
1447 * selectors.
1448 *
1449 * @param in
1450 * the input
1451 * @param needle
1452 * a string that must be found inside the target line (also
1453 * supports "^" at start to say "only if it starts with" the
1454 * needle)
1455 * @param relativeLine
1456 * the line to return based upon the target line position (-1 =
1457 * the line before, 0 = the target line...)
1458 *
1459 * @return the line
1460 */
1461 static protected String getLine(InputStream in, String needle,
1462 int relativeLine) {
1463 return getLine(in, needle, relativeLine, true);
1464 }
1465
1466 /**
1467 * Return a line from the given input which correspond to the given
1468 * selectors.
1469 *
1470 * @param in
1471 * the input
1472 * @param needle
1473 * a string that must be found inside the target line (also
1474 * supports "^" at start to say "only if it starts with" the
1475 * needle)
1476 * @param relativeLine
1477 * the line to return based upon the target line position (-1 =
1478 * the line before, 0 = the target line...)
1479 * @param first
1480 * takes the first result (as opposed to the last one, which will
1481 * also always spend the input)
1482 *
1483 * @return the line
1484 */
1485 static protected String getLine(InputStream in, String needle,
1486 int relativeLine, boolean first) {
1487 String rep = null;
1488
1489 reset(in);
1490
1491 List<String> lines = new ArrayList<String>();
1492 @SuppressWarnings("resource")
1493 Scanner scan = new Scanner(in, "UTF-8");
1494 int index = -1;
1495 scan.useDelimiter("\\n");
1496 while (scan.hasNext()) {
1497 lines.add(scan.next());
1498
1499 if (index == -1) {
1500 if (needle.startsWith("^")) {
1501 if (lines.get(lines.size() - 1).startsWith(
1502 needle.substring(1))) {
1503 index = lines.size() - 1;
1504 }
1505
1506 } else {
1507 if (lines.get(lines.size() - 1).contains(needle)) {
1508 index = lines.size() - 1;
1509 }
1510 }
1511 }
1512
1513 if (index >= 0 && index + relativeLine < lines.size()) {
1514 rep = lines.get(index + relativeLine);
1515 if (first) {
1516 break;
1517 }
1518 }
1519 }
1520
1521 return rep;
1522 }
1523
1524 /**
1525 * Return the text between the key and the endKey (and optional subKey can
1526 * be passed, in this case we will look for the key first, then take the
1527 * text between the subKey and the endKey).
1528 * <p>
1529 * Will only match the first line with the given key if more than one are
1530 * possible. Which also means that if the subKey or endKey is not found on
1531 * that line, NULL will be returned.
1532 *
1533 * @param in
1534 * the input
1535 * @param key
1536 * the key to match (also supports "^" at start to say
1537 * "only if it starts with" the key)
1538 * @param subKey
1539 * the sub key or NULL if none
1540 * @param endKey
1541 * the end key or NULL for "up to the end"
1542 * @return the text or NULL if not found
1543 */
1544 static protected String getKeyLine(InputStream in, String key,
1545 String subKey, String endKey) {
1546 return getKeyText(getLine(in, key, 0), key, subKey, endKey);
1547 }
1548
1549 /**
1550 * Return the text between the key and the endKey (and optional subKey can
1551 * be passed, in this case we will look for the key first, then take the
1552 * text between the subKey and the endKey).
1553 *
1554 * @param in
1555 * the input
1556 * @param key
1557 * the key to match (also supports "^" at start to say
1558 * "only if it starts with" the key)
1559 * @param subKey
1560 * the sub key or NULL if none
1561 * @param endKey
1562 * the end key or NULL for "up to the end"
1563 * @return the text or NULL if not found
1564 */
1565 static protected String getKeyText(String in, String key, String subKey,
1566 String endKey) {
1567 String result = null;
1568
1569 String line = in;
1570 if (line != null && line.contains(key)) {
1571 line = line.substring(line.indexOf(key) + key.length());
1572 if (subKey == null || subKey.isEmpty() || line.contains(subKey)) {
1573 if (subKey != null) {
1574 line = line.substring(line.indexOf(subKey)
1575 + subKey.length());
1576 }
1577 if (endKey == null || line.contains(endKey)) {
1578 if (endKey != null) {
1579 line = line.substring(0, line.indexOf(endKey));
1580 result = line;
1581 }
1582 }
1583 }
1584 }
1585
1586 return result;
1587 }
1588
1589 /**
1590 * Return the text between the key and the endKey (optional subKeys can be
1591 * passed, in this case we will look for the subKeys first, then take the
1592 * text between the key and the endKey).
1593 *
1594 * @param in
1595 * the input
1596 * @param key
1597 * the key to match
1598 * @param endKey
1599 * the end key or NULL for "up to the end"
1600 * @param afters
1601 * the sub-keys to find before checking for key/endKey
1602 *
1603 * @return the text or NULL if not found
1604 */
1605 static protected String getKeyTextAfter(String in, String key,
1606 String endKey, String... afters) {
1607
1608 if (in != null && !in.isEmpty()) {
1609 int pos = indexOfAfter(in, 0, afters);
1610 if (pos < 0) {
1611 return null;
1612 }
1613
1614 in = in.substring(pos);
1615 }
1616
1617 return getKeyText(in, key, null, endKey);
1618 }
1619
1620 /**
1621 * Return the first index after all the given "afters" have been found in
1622 * the {@link String}, or -1 if it was not possible.
1623 *
1624 * @param in
1625 * the input
1626 * @param startAt
1627 * start at this position in the string
1628 * @param afters
1629 * the sub-keys to find before checking for key/endKey
1630 *
1631 * @return the text or NULL if not found
1632 */
1633 static protected int indexOfAfter(String in, int startAt, String... afters) {
1634 int pos = -1;
1635 if (in != null && !in.isEmpty()) {
1636 pos = startAt;
1637 if (afters != null) {
1638 for (int i = 0; pos >= 0 && i < afters.length; i++) {
1639 String subKey = afters[i];
1640 if (!subKey.isEmpty()) {
1641 pos = in.indexOf(subKey, pos);
1642 if (pos >= 0) {
1643 pos += subKey.length();
1644 }
1645 }
1646 }
1647 }
1648 }
1649
1650 return pos;
1651 }
1652 }