Main.java: new actins available
[fanfix.git] / src / be / nikiroo / fanfix / supported / BasicSupport.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.ByteArrayInputStream;
4 import java.io.File;
5 import java.io.IOException;
6 import java.io.InputStream;
7 import java.net.MalformedURLException;
8 import java.net.URL;
9 import java.nio.charset.StandardCharsets;
10 import java.util.ArrayList;
11 import java.util.HashMap;
12 import java.util.List;
13 import java.util.Map;
14 import java.util.Map.Entry;
15 import java.util.Scanner;
16
17 import be.nikiroo.fanfix.Instance;
18 import be.nikiroo.fanfix.bundles.Config;
19 import be.nikiroo.fanfix.bundles.StringId;
20 import be.nikiroo.fanfix.data.Chapter;
21 import be.nikiroo.fanfix.data.MetaData;
22 import be.nikiroo.fanfix.data.Paragraph;
23 import be.nikiroo.fanfix.data.Story;
24 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
25 import be.nikiroo.utils.StringUtils;
26
27 /**
28 * This class is the base class used by the other support classes. It can be
29 * used outside of this package, and have static method that you can use to get
30 * access to the correct support class.
31 * <p>
32 * It will be used with 'resources' (usually web pages or files).
33 *
34 * @author niki
35 */
36 public abstract class BasicSupport {
37 /**
38 * The supported input types for which we can get a {@link BasicSupport}
39 * object.
40 *
41 * @author niki
42 */
43 public enum SupportType {
44 /** EPUB files created with this program */
45 EPUB,
46 /** Pure text file with some rules */
47 TEXT,
48 /** TEXT but with associated .info file */
49 INFO_TEXT,
50 /** My Little Pony fanfictions */
51 FIMFICTION,
52 /** Fanfictions from a lot of different universes */
53 FANFICTION,
54 /** Website with lots of Mangas */
55 MANGAFOX,
56 /** Furry website with comics support */
57 E621,
58 /** CBZ files */
59 CBZ;
60
61 /**
62 * A description of this support type (more information than the
63 * {@link BasicSupport#getSourceName()}).
64 *
65 * @return the description
66 */
67 public String getDesc() {
68 String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
69 this.name());
70
71 if (desc == null) {
72 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
73 }
74
75 return desc;
76 }
77
78 /**
79 * The name of this support type (a short version).
80 *
81 * @return the name
82 */
83 public String getSourceName() {
84 BasicSupport support = BasicSupport.getSupport(this);
85 if (support != null) {
86 return support.getSourceName();
87 }
88
89 return null;
90 }
91
92 @Override
93 public String toString() {
94 return super.toString().toLowerCase();
95 }
96
97 /**
98 * Call {@link SupportType#valueOf(String.toUpperCase())}.
99 *
100 * @param typeName
101 * the possible type name
102 *
103 * @return NULL or the type
104 */
105 public static SupportType valueOfUC(String typeName) {
106 return SupportType.valueOf(typeName == null ? null : typeName
107 .toUpperCase());
108 }
109
110 /**
111 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
112 * NULL for NULL instead of raising exception.
113 *
114 * @param typeName
115 * the possible type name
116 *
117 * @return NULL or the type
118 */
119 public static SupportType valueOfNullOkUC(String typeName) {
120 if (typeName == null) {
121 return null;
122 }
123
124 return SupportType.valueOfUC(typeName);
125 }
126
127 /**
128 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
129 * NULL in case of error instead of raising an exception.
130 *
131 * @param typeName
132 * the possible type name
133 *
134 * @return NULL or the type
135 */
136 public static SupportType valueOfAllOkUC(String typeName) {
137 try {
138 return SupportType.valueOfUC(typeName);
139 } catch (Exception e) {
140 return null;
141 }
142 }
143 }
144
145 /** Only used by {@link BasicSupport#getInput()} just so it is always reset. */
146 private InputStream in;
147 private SupportType type;
148 private URL currentReferer; // with on 'r', as in 'HTTP'...
149
150 // quote chars
151 private char openQuote = Instance.getTrans().getChar(
152 StringId.OPEN_SINGLE_QUOTE);
153 private char closeQuote = Instance.getTrans().getChar(
154 StringId.CLOSE_SINGLE_QUOTE);
155 private char openDoubleQuote = Instance.getTrans().getChar(
156 StringId.OPEN_DOUBLE_QUOTE);
157 private char closeDoubleQuote = Instance.getTrans().getChar(
158 StringId.CLOSE_DOUBLE_QUOTE);
159
160 /**
161 * The name of this support class.
162 *
163 * @return the name
164 */
165 protected abstract String getSourceName();
166
167 /**
168 * Check if the given resource is supported by this {@link BasicSupport}.
169 *
170 * @param url
171 * the resource to check for
172 *
173 * @return TRUE if it is
174 */
175 protected abstract boolean supports(URL url);
176
177 /**
178 * Return TRUE if the support will return HTML encoded content values for
179 * the chapters content.
180 *
181 * @return TRUE for HTML
182 */
183 protected abstract boolean isHtml();
184
185 /**
186 * Return the story title.
187 *
188 * @param source
189 * the source of the story
190 * @param in
191 * the input (the main resource)
192 *
193 * @return the title
194 *
195 * @throws IOException
196 * in case of I/O error
197 */
198 protected abstract String getTitle(URL source, InputStream in)
199 throws IOException;
200
201 /**
202 * Return the story author.
203 *
204 * @param source
205 * the source of the story
206 * @param in
207 * the input (the main resource)
208 *
209 * @return the author
210 *
211 * @throws IOException
212 * in case of I/O error
213 */
214 protected abstract String getAuthor(URL source, InputStream in)
215 throws IOException;
216
217 /**
218 * Return the story publication date.
219 *
220 * @param source
221 * the source of the story
222 * @param in
223 * the input (the main resource)
224 *
225 * @return the date
226 *
227 * @throws IOException
228 * in case of I/O error
229 */
230 protected abstract String getDate(URL source, InputStream in)
231 throws IOException;
232
233 /**
234 * Return the subject of the story (for instance, if it is a fanfiction,
235 * what is the original work; if it is a technical text, what is the
236 * technical subject...).
237 *
238 * @param source
239 * the source of the story
240 * @param in
241 * the input (the main resource)
242 *
243 * @return the subject
244 *
245 * @throws IOException
246 * in case of I/O error
247 */
248 protected abstract String getSubject(URL source, InputStream in)
249 throws IOException;
250
251 /**
252 * Return the story description.
253 *
254 * @param source
255 * the source of the story
256 * @param in
257 * the input (the main resource)
258 *
259 * @return the description
260 *
261 * @throws IOException
262 * in case of I/O error
263 */
264 protected abstract String getDesc(URL source, InputStream in)
265 throws IOException;
266
267 /**
268 * Return the story cover resource if any, or NULL if none.
269 * <p>
270 * The default cover should not be checked for here.
271 *
272 * @param source
273 * the source of the story
274 * @param in
275 * the input (the main resource)
276 *
277 * @return the cover or NULL
278 *
279 * @throws IOException
280 * in case of I/O error
281 */
282 protected abstract URL getCover(URL source, InputStream in)
283 throws IOException;
284
285 /**
286 * Return the list of chapters (name and resource).
287 *
288 * @param source
289 * the source of the story
290 * @param in
291 * the input (the main resource)
292 *
293 * @return the chapters
294 *
295 * @throws IOException
296 * in case of I/O error
297 */
298 protected abstract List<Entry<String, URL>> getChapters(URL source,
299 InputStream in) throws IOException;
300
301 /**
302 * Return the content of the chapter (possibly HTML encoded, if
303 * {@link BasicSupport#isHtml()} is TRUE).
304 *
305 * @param source
306 * the source of the story
307 * @param in
308 * the input (the main resource)
309 * @param number
310 * the chapter number
311 *
312 * @return the content
313 *
314 * @throws IOException
315 * in case of I/O error
316 */
317 protected abstract String getChapterContent(URL source, InputStream in,
318 int number) throws IOException;
319
320 /**
321 * Check if this {@link BasicSupport} is mainly catered to image files.
322 *
323 * @return TRUE if it is
324 */
325 public boolean isImageDocument(URL source, InputStream in)
326 throws IOException {
327 return false;
328 }
329
330 /**
331 * Return the list of cookies (values included) that must be used to
332 * correctly fetch the resources.
333 * <p>
334 * You are expected to call the super method implementation if you override
335 * it.
336 *
337 * @return the cookies
338 */
339 public Map<String, String> getCookies() {
340 return new HashMap<String, String>();
341 }
342
343 /**
344 * Process the given story resource into a partially filled {@link Story}
345 * object containing the name and metadata, except for the description.
346 *
347 * @param url
348 * the story resource
349 *
350 * @return the {@link Story}
351 *
352 * @throws IOException
353 * in case of I/O error
354 */
355 public Story processMeta(URL url) throws IOException {
356 return processMeta(url, true, false);
357 }
358
359 /**
360 * Process the given story resource into a partially filled {@link Story}
361 * object containing the name and metadata.
362 *
363 * @param url
364 * the story resource
365 *
366 * @param close
367 * close "this" and "in" when done
368 *
369 * @return the {@link Story}
370 *
371 * @throws IOException
372 * in case of I/O error
373 */
374 protected Story processMeta(URL url, boolean close, boolean getDesc)
375 throws IOException {
376 in = Instance.getCache().open(url, this, false);
377 if (in == null) {
378 return null;
379 }
380
381 try {
382 preprocess(getInput());
383
384 Story story = new Story();
385 story.setMeta(new MetaData());
386 story.getMeta().setTitle(ifUnhtml(getTitle(url, getInput())));
387 story.getMeta().setAuthor(
388 fixAuthor(ifUnhtml(getAuthor(url, getInput()))));
389 story.getMeta().setDate(ifUnhtml(getDate(url, getInput())));
390 story.getMeta().setTags(getTags(url, getInput()));
391 story.getMeta().setSource(getSourceName());
392 story.getMeta().setPublisher(
393 ifUnhtml(getPublisher(url, getInput())));
394 story.getMeta().setUuid(getUuid(url, getInput()));
395 story.getMeta().setLuid(getLuid(url, getInput()));
396 story.getMeta().setLang(getLang(url, getInput()));
397 story.getMeta().setSubject(ifUnhtml(getSubject(url, getInput())));
398 story.getMeta().setImageDocument(isImageDocument(url, getInput()));
399
400 if (getDesc) {
401 String descChapterName = Instance.getTrans().getString(
402 StringId.DESCRIPTION);
403 story.getMeta().setResume(
404 makeChapter(url, 0, descChapterName,
405 getDesc(url, getInput())));
406 }
407
408 return story;
409 } finally {
410 if (close) {
411 try {
412 close();
413 } catch (IOException e) {
414 Instance.syserr(e);
415 }
416
417 if (in != null) {
418 in.close();
419 }
420 }
421 }
422 }
423
424 /**
425 * Process the given story resource into a fully filled {@link Story}
426 * object.
427 *
428 * @param url
429 * the story resource
430 *
431 * @return the {@link Story}
432 *
433 * @throws IOException
434 * in case of I/O error
435 */
436 public Story process(URL url) throws IOException {
437 setCurrentReferer(url);
438
439 try {
440 Story story = processMeta(url, false, true);
441 if (story == null) {
442 return null;
443 }
444
445 story.setChapters(new ArrayList<Chapter>());
446
447 URL cover = getCover(url, getInput());
448 if (cover == null) {
449 String subject = story.getMeta() == null ? null : story
450 .getMeta().getSubject();
451 if (subject != null && !subject.isEmpty()
452 && Instance.getCoverDir() != null) {
453 File fileCover = new File(Instance.getCoverDir(), subject);
454 cover = getImage(fileCover.toURI().toURL(), subject);
455 }
456 }
457
458 if (cover != null) {
459 InputStream coverIn = null;
460 try {
461 coverIn = Instance.getCache().open(cover, this, true);
462 story.getMeta().setCover(StringUtils.toImage(coverIn));
463 } catch (IOException e) {
464 Instance.syserr(new IOException(Instance.getTrans()
465 .getString(StringId.ERR_BS_NO_COVER, cover), e));
466 } finally {
467 if (coverIn != null)
468 coverIn.close();
469 }
470 }
471
472 List<Entry<String, URL>> chapters = getChapters(url, getInput());
473 int i = 1;
474 if (chapters != null) {
475 for (Entry<String, URL> chap : chapters) {
476 setCurrentReferer(chap.getValue());
477 InputStream chapIn = Instance.getCache().open(
478 chap.getValue(), this, true);
479 try {
480 story.getChapters().add(
481 makeChapter(url, i, chap.getKey(),
482 getChapterContent(url, chapIn, i)));
483 } finally {
484 chapIn.close();
485 }
486 i++;
487 }
488 }
489
490 return story;
491
492 } finally {
493 try {
494 close();
495 } catch (IOException e) {
496 Instance.syserr(e);
497 }
498
499 if (in != null) {
500 in.close();
501 }
502
503 currentReferer = null;
504 }
505 }
506
507 /**
508 * The support type.$
509 *
510 * @return the type
511 */
512 public SupportType getType() {
513 return type;
514 }
515
516 /**
517 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
518 * the current {@link URL} we work on.
519 *
520 * @return the referer
521 */
522 public URL getCurrentReferer() {
523 return currentReferer;
524 }
525
526 /**
527 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
528 * the current {@link URL} we work on.
529 *
530 * @param currentReferer
531 * the new referer
532 */
533 protected void setCurrentReferer(URL currentReferer) {
534 this.currentReferer = currentReferer;
535 }
536
537 /**
538 * The support type.
539 *
540 * @param type
541 * the new type
542 *
543 * @return this
544 */
545 protected BasicSupport setType(SupportType type) {
546 this.type = type;
547 return this;
548 }
549
550 /**
551 * Return the story publisher (by default,
552 * {@link BasicSupport#getSourceName()}).
553 *
554 * @param source
555 * the source of the story
556 * @param in
557 * the input (the main resource)
558 *
559 * @return the publisher
560 *
561 * @throws IOException
562 * in case of I/O error
563 */
564 protected String getPublisher(URL source, InputStream in)
565 throws IOException {
566 return getSourceName();
567 }
568
569 /**
570 * Return the story UUID, a unique value representing the story (it is often
571 * an URL).
572 * <p>
573 * By default, this is the {@link URL} of the resource.
574 *
575 * @param source
576 * the source of the story
577 * @param in
578 * the input (the main resource)
579 *
580 * @return the uuid
581 *
582 * @throws IOException
583 * in case of I/O error
584 */
585 protected String getUuid(URL source, InputStream in) throws IOException {
586 return source.toString();
587 }
588
589 /**
590 * Return the story Library UID, a unique value representing the story (it
591 * is often a number) in the local library.
592 * <p>
593 * By default, this is empty.
594 *
595 * @param source
596 * the source of the story
597 * @param in
598 * the input (the main resource)
599 *
600 * @return the id
601 *
602 * @throws IOException
603 * in case of I/O error
604 */
605 protected String getLuid(URL source, InputStream in) throws IOException {
606 return "";
607 }
608
609 /**
610 * Return the 2-letter language code of this story.
611 * <p>
612 * By default, this is 'EN'.
613 *
614 * @param source
615 * the source of the story
616 * @param in
617 * the input (the main resource)
618 *
619 * @return the language
620 *
621 * @throws IOException
622 * in case of I/O error
623 */
624 protected String getLang(URL source, InputStream in) throws IOException {
625 return "EN";
626 }
627
628 /**
629 * Return the list of tags for this story.
630 *
631 * @param source
632 * the source of the story
633 * @param in
634 * the input (the main resource)
635 *
636 * @return the tags
637 *
638 * @throws IOException
639 * in case of I/O error
640 */
641 protected List<String> getTags(URL source, InputStream in)
642 throws IOException {
643 return new ArrayList<String>();
644 }
645
646 /**
647 * Return the first line from the given input which correspond to the given
648 * selectors.
649 * <p>
650 * Do not reset the input, which will be pointing at the line just after the
651 * result (input will be spent if no result is found).
652 *
653 * @param in
654 * the input
655 * @param needle
656 * a string that must be found inside the target line (also
657 * supports "^" at start to say "only if it starts with" the
658 * needle)
659 * @param relativeLine
660 * the line to return based upon the target line position (-1 =
661 * the line before, 0 = the target line...)
662 *
663 * @return the line
664 */
665 protected String getLine(InputStream in, String needle, int relativeLine) {
666 return getLine(in, needle, relativeLine, true);
667 }
668
669 /**
670 * Return a line from the given input which correspond to the given
671 * selectors.
672 * <p>
673 * Do not reset the input, which will be pointing at the line just after the
674 * result (input will be spent if no result is found) when first is TRUE,
675 * and will always be spent if first is FALSE.
676 *
677 * @param in
678 * the input
679 * @param needle
680 * a string that must be found inside the target line (also
681 * supports "^" at start to say "only if it starts with" the
682 * needle)
683 * @param relativeLine
684 * the line to return based upon the target line position (-1 =
685 * the line before, 0 = the target line...)
686 * @param first
687 * takes the first result (as opposed to the last one, which will
688 * also always spend the input)
689 *
690 * @return the line
691 */
692 protected String getLine(InputStream in, String needle, int relativeLine,
693 boolean first) {
694 String rep = null;
695
696 List<String> lines = new ArrayList<String>();
697 @SuppressWarnings("resource")
698 Scanner scan = new Scanner(in, "UTF-8");
699 int index = -1;
700 scan.useDelimiter("\\n");
701 while (scan.hasNext()) {
702 lines.add(scan.next());
703
704 if (index == -1) {
705 if (needle.startsWith("^")) {
706 if (lines.get(lines.size() - 1).startsWith(
707 needle.substring(1))) {
708 index = lines.size() - 1;
709 }
710
711 } else {
712 if (lines.get(lines.size() - 1).contains(needle)) {
713 index = lines.size() - 1;
714 }
715 }
716 }
717
718 if (index >= 0 && index + relativeLine < lines.size()) {
719 rep = lines.get(index + relativeLine);
720 if (first) {
721 break;
722 }
723 }
724 }
725
726 return rep;
727 }
728
729 /**
730 * Prepare the support if needed before processing.
731 *
732 * @throws IOException
733 * on I/O error
734 */
735 protected void preprocess(InputStream in) throws IOException {
736 }
737
738 /**
739 * Now that we have processed the {@link Story}, close the resources if any.
740 *
741 * @throws IOException
742 * on I/O error
743 */
744 protected void close() throws IOException {
745 }
746
747 /**
748 * Create a {@link Chapter} object from the given information, formatting
749 * the content as it should be.
750 *
751 * @param number
752 * the chapter number
753 * @param name
754 * the chapter name
755 * @param content
756 * the chapter content
757 *
758 * @return the {@link Chapter}
759 *
760 * @throws IOException
761 * in case of I/O error
762 */
763 protected Chapter makeChapter(URL source, int number, String name,
764 String content) throws IOException {
765
766 // Chapter name: process it correctly, then remove the possible
767 // redundant "Chapter x: " in front of it
768 String chapterName = processPara(name).getContent().trim();
769 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
770 .split(",")) {
771 String chapterWord = Instance.getConfig().getStringX(
772 Config.CHAPTER, lang);
773 if (chapterName.startsWith(chapterWord)) {
774 chapterName = chapterName.substring(chapterWord.length())
775 .trim();
776 break;
777 }
778 }
779
780 if (chapterName.startsWith(Integer.toString(number))) {
781 chapterName = chapterName.substring(
782 Integer.toString(number).length()).trim();
783 }
784
785 if (chapterName.startsWith(":")) {
786 chapterName = chapterName.substring(1).trim();
787 }
788 //
789
790 Chapter chap = new Chapter(number, chapterName);
791
792 if (content == null) {
793 return chap;
794 }
795
796 if (isHtml()) {
797 // Special <HR> processing:
798 content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
799 "\n* * *\n");
800 }
801
802 InputStream in = new ByteArrayInputStream(
803 content.getBytes(StandardCharsets.UTF_8));
804 try {
805 @SuppressWarnings("resource")
806 Scanner scan = new Scanner(in, "UTF-8");
807 scan.useDelimiter("(\\n|</p>)"); // \n for test, </p> for html
808
809 List<Paragraph> paras = new ArrayList<Paragraph>();
810 while (scan.hasNext()) {
811 String line = scan.next().trim();
812 boolean image = false;
813 if (line.startsWith("[") && line.endsWith("]")) {
814 URL url = getImage(source,
815 line.substring(1, line.length() - 1).trim());
816 if (url != null) {
817 paras.add(new Paragraph(url));
818 image = true;
819 }
820 }
821
822 if (!image) {
823 paras.add(processPara(line));
824 }
825 }
826
827 // Check quotes for "bad" format
828 List<Paragraph> newParas = new ArrayList<Paragraph>();
829 for (Paragraph para : paras) {
830 newParas.addAll(requotify(para));
831 }
832 paras = newParas;
833
834 // Remove double blanks/brks
835 boolean space = false;
836 boolean brk = true;
837 for (int i = 0; i < paras.size(); i++) {
838 Paragraph para = paras.get(i);
839 boolean thisSpace = para.getType() == ParagraphType.BLANK;
840 boolean thisBrk = para.getType() == ParagraphType.BREAK;
841
842 if (space && thisBrk) {
843 paras.remove(i - 1);
844 i--;
845 } else if ((space || brk) && (thisSpace || thisBrk)) {
846 paras.remove(i);
847 i--;
848 }
849
850 space = thisSpace;
851 brk = thisBrk;
852 }
853
854 // Remove blank/brk at start
855 if (paras.size() > 0
856 && (paras.get(0).getType() == ParagraphType.BLANK || paras
857 .get(0).getType() == ParagraphType.BREAK)) {
858 paras.remove(0);
859 }
860
861 // Remove blank/brk at end
862 int last = paras.size() - 1;
863 if (paras.size() > 0
864 && (paras.get(last).getType() == ParagraphType.BLANK || paras
865 .get(last).getType() == ParagraphType.BREAK)) {
866 paras.remove(last);
867 }
868
869 chap.setParagraphs(paras);
870
871 return chap;
872 } finally {
873 in.close();
874 }
875 }
876
877 /**
878 * Return the list of supported image extensions.
879 *
880 * @return the extensions
881 */
882 protected String[] getImageExt(boolean emptyAllowed) {
883 if (emptyAllowed) {
884 return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
885 } else {
886 return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
887 }
888 }
889
890 /**
891 * Check if the given resource can be a local image or a remote image, then
892 * refresh the cache with it if it is.
893 *
894 * @param source
895 * the story source
896 * @param line
897 * the resource to check
898 *
899 * @return the image URL if found, or NULL
900 *
901 */
902 protected URL getImage(URL source, String line) {
903 String path = new File(source.getFile()).getParent();
904 URL url = null;
905
906 // try for files
907 try {
908 String urlBase = new File(new File(path), line.trim()).toURI()
909 .toURL().toString();
910 for (String ext : getImageExt(true)) {
911 if (new File(urlBase + ext).exists()) {
912 url = new File(urlBase + ext).toURI().toURL();
913 }
914 }
915 } catch (Exception e) {
916 // Nothing to do here
917 }
918
919 if (url == null) {
920 // try for URLs
921 try {
922 for (String ext : getImageExt(true)) {
923 if (Instance.getCache().check(new URL(line + ext))) {
924 url = new URL(line + ext);
925 }
926 }
927
928 // try out of cache
929 if (url == null) {
930 for (String ext : getImageExt(true)) {
931 try {
932 url = new URL(line + ext);
933 Instance.getCache().refresh(url, this, true);
934 break;
935 } catch (IOException e) {
936 // no image with this ext
937 url = null;
938 }
939 }
940 }
941 } catch (MalformedURLException e) {
942 // Not an url
943 }
944 }
945
946 // refresh the cached file
947 if (url != null) {
948 try {
949 Instance.getCache().refresh(url, this, true);
950 } catch (IOException e) {
951 // woops, broken image
952 url = null;
953 }
954 }
955
956 return url;
957 }
958
959 /**
960 * Reset then return {@link BasicSupport#in}.
961 *
962 * @return {@link BasicSupport#in}
963 *
964 * @throws IOException
965 * in case of I/O error
966 */
967 protected InputStream getInput() throws IOException {
968 in.reset();
969 return in;
970 }
971
972 /**
973 * Fix the author name if it is prefixed with some "by" {@link String}.
974 *
975 * @param author
976 * the author with a possible prefix
977 *
978 * @return the author without prefixes
979 */
980 private String fixAuthor(String author) {
981 if (author != null) {
982 for (String suffix : new String[] { " ", ":" }) {
983 for (String byString : Instance.getConfig()
984 .getString(Config.BYS).split(",")) {
985 byString += suffix;
986 if (author.toUpperCase().startsWith(byString.toUpperCase())) {
987 author = author.substring(byString.length()).trim();
988 }
989 }
990 }
991
992 // Special case (without suffix):
993 if (author.startsWith("©")) {
994 author = author.substring(1);
995 }
996 }
997
998 return author;
999 }
1000
1001 /**
1002 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
1003 * and requotify them (i.e., separate them into QUOTE paragraphs and other
1004 * paragraphs (quotes or not)).
1005 *
1006 * @param para
1007 * the paragraph to requotify (not necessaraly a quote)
1008 *
1009 * @return the correctly (or so we hope) quotified paragraphs
1010 */
1011 private List<Paragraph> requotify(Paragraph para) {
1012 List<Paragraph> newParas = new ArrayList<Paragraph>();
1013
1014 if (para.getType() == ParagraphType.QUOTE) {
1015 String line = para.getContent();
1016 boolean singleQ = line.startsWith("" + openQuote);
1017 boolean doubleQ = line.startsWith("" + openDoubleQuote);
1018
1019 if (!singleQ && !doubleQ) {
1020 line = openDoubleQuote + line + closeDoubleQuote;
1021 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
1022 } else {
1023 char close = singleQ ? closeQuote : closeDoubleQuote;
1024 int posClose = line.indexOf(close);
1025 int posDot = line.indexOf(".");
1026 while (posDot >= 0 && posDot < posClose) {
1027 posDot = line.indexOf(".", posDot + 1);
1028 }
1029
1030 if (posDot >= 0) {
1031 String rest = line.substring(posDot + 1).trim();
1032 line = line.substring(0, posDot + 1).trim();
1033 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
1034 newParas.addAll(requotify(processPara(rest)));
1035 } else {
1036 newParas.add(para);
1037 }
1038 }
1039 } else {
1040 newParas.add(para);
1041 }
1042
1043 return newParas;
1044 }
1045
1046 /**
1047 * Process a {@link Paragraph} from a raw line of text.
1048 * <p>
1049 * Will also fix quotes and HTML encoding if needed.
1050 *
1051 * @param line
1052 * the raw line
1053 *
1054 * @return the processed {@link Paragraph}
1055 */
1056 private Paragraph processPara(String line) {
1057 line = ifUnhtml(line).trim();
1058
1059 boolean space = true;
1060 boolean brk = true;
1061 boolean quote = false;
1062 boolean tentativeCloseQuote = false;
1063 char prev = '\0';
1064 int dashCount = 0;
1065
1066 StringBuilder builder = new StringBuilder();
1067 for (char car : line.toCharArray()) {
1068 if (car != '-') {
1069 if (dashCount > 0) {
1070 // dash, ndash and mdash: - – —
1071 // currently: always use mdash
1072 builder.append(dashCount == 1 ? '-' : '—');
1073 }
1074 dashCount = 0;
1075 }
1076
1077 if (tentativeCloseQuote) {
1078 tentativeCloseQuote = false;
1079 if ((car >= 'a' && car <= 'z') || (car >= 'A' && car <= 'Z')
1080 || (car >= '0' && car <= '9')) {
1081 builder.append("'");
1082 } else {
1083 builder.append(closeQuote);
1084 }
1085 }
1086
1087 switch (car) {
1088 case ' ': // note: unbreakable space
1089 case ' ':
1090 case '\t':
1091 case '\n': // just in case
1092 case '\r': // just in case
1093 builder.append(' ');
1094 break;
1095
1096 case '\'':
1097 if (space || (brk && quote)) {
1098 quote = true;
1099 builder.append(openQuote);
1100 } else if (prev == ' ') {
1101 builder.append(openQuote);
1102 } else {
1103 // it is a quote ("I'm off") or a 'quote' ("This
1104 // 'good' restaurant"...)
1105 tentativeCloseQuote = true;
1106 }
1107 break;
1108
1109 case '"':
1110 if (space || (brk && quote)) {
1111 quote = true;
1112 builder.append(openDoubleQuote);
1113 } else if (prev == ' ') {
1114 builder.append(openDoubleQuote);
1115 } else {
1116 builder.append(closeDoubleQuote);
1117 }
1118 break;
1119
1120 case '-':
1121 if (space) {
1122 quote = true;
1123 } else {
1124 dashCount++;
1125 }
1126 space = false;
1127 break;
1128
1129 case '*':
1130 case '~':
1131 case '/':
1132 case '\\':
1133 case '<':
1134 case '>':
1135 case '=':
1136 case '+':
1137 case '_':
1138 case '–':
1139 case '—':
1140 space = false;
1141 builder.append(car);
1142 break;
1143
1144 case '‘':
1145 case '`':
1146 case '‹':
1147 case '﹁':
1148 case '〈':
1149 case '「':
1150 if (space || (brk && quote)) {
1151 quote = true;
1152 builder.append(openQuote);
1153 } else {
1154 builder.append(openQuote);
1155 }
1156 space = false;
1157 brk = false;
1158 break;
1159
1160 case '’':
1161 case '›':
1162 case '﹂':
1163 case '〉':
1164 case '」':
1165 space = false;
1166 brk = false;
1167 builder.append(closeQuote);
1168 break;
1169
1170 case '«':
1171 case '“':
1172 case '﹃':
1173 case '《':
1174 case '『':
1175 if (space || (brk && quote)) {
1176 quote = true;
1177 builder.append(openDoubleQuote);
1178 } else {
1179 builder.append(openDoubleQuote);
1180 }
1181 space = false;
1182 brk = false;
1183 break;
1184
1185 case '»':
1186 case '”':
1187 case '﹄':
1188 case '》':
1189 case '』':
1190 space = false;
1191 brk = false;
1192 builder.append(closeDoubleQuote);
1193 break;
1194
1195 default:
1196 space = false;
1197 brk = false;
1198 builder.append(car);
1199 break;
1200 }
1201
1202 prev = car;
1203 }
1204
1205 if (tentativeCloseQuote) {
1206 tentativeCloseQuote = false;
1207 builder.append(closeQuote);
1208 }
1209
1210 line = builder.toString().trim();
1211
1212 ParagraphType type = ParagraphType.NORMAL;
1213 if (space) {
1214 type = ParagraphType.BLANK;
1215 } else if (brk) {
1216 type = ParagraphType.BREAK;
1217 } else if (quote) {
1218 type = ParagraphType.QUOTE;
1219 }
1220
1221 return new Paragraph(type, line);
1222 }
1223
1224 /**
1225 * Remove the HTML from the inpit <b>if</b> {@link BasicSupport#isHtml()} is
1226 * true.
1227 *
1228 * @param input
1229 * the input
1230 *
1231 * @return the no html version if needed
1232 */
1233 private String ifUnhtml(String input) {
1234 if (isHtml() && input != null) {
1235 return StringUtils.unhtml(input);
1236 }
1237
1238 return input;
1239 }
1240
1241 /**
1242 * Return a {@link BasicSupport} implementation supporting the given
1243 * resource if possible.
1244 *
1245 * @param url
1246 * the story resource
1247 *
1248 * @return an implementation that supports it, or NULL
1249 */
1250 public static BasicSupport getSupport(URL url) {
1251 if (url == null) {
1252 return null;
1253 }
1254
1255 // TEXT and INFO_TEXT always support files (not URLs though)
1256 for (SupportType type : SupportType.values()) {
1257 if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1258 BasicSupport support = getSupport(type);
1259 if (support != null && support.supports(url)) {
1260 return support;
1261 }
1262 }
1263 }
1264
1265 for (SupportType type : new SupportType[] { SupportType.TEXT,
1266 SupportType.INFO_TEXT }) {
1267 BasicSupport support = getSupport(type);
1268 if (support != null && support.supports(url)) {
1269 return support;
1270 }
1271 }
1272
1273 return null;
1274 }
1275
1276 /**
1277 * Return a {@link BasicSupport} implementation supporting the given type.
1278 *
1279 * @param type
1280 * the type
1281 *
1282 * @return an implementation that supports it, or NULL
1283 */
1284 public static BasicSupport getSupport(SupportType type) {
1285 switch (type) {
1286 case EPUB:
1287 return new Epub().setType(type);
1288 case INFO_TEXT:
1289 return new InfoText().setType(type);
1290 case FIMFICTION:
1291 return new Fimfiction().setType(type);
1292 case FANFICTION:
1293 return new Fanfiction().setType(type);
1294 case TEXT:
1295 return new Text().setType(type);
1296 case MANGAFOX:
1297 return new MangaFox().setType(type);
1298 case E621:
1299 return new E621().setType(type);
1300 case CBZ:
1301 return new Cbz().setType(type);
1302 }
1303
1304 return null;
1305 }
1306 }