Library fixes + "make install" fix
[fanfix.git] / src / be / nikiroo / fanfix / supported / BasicSupport.java
... / ...
CommitLineData
1package be.nikiroo.fanfix.supported;
2
3import java.io.ByteArrayInputStream;
4import java.io.File;
5import java.io.IOException;
6import java.io.InputStream;
7import java.net.MalformedURLException;
8import java.net.URL;
9import java.util.ArrayList;
10import java.util.HashMap;
11import java.util.List;
12import java.util.Map;
13import java.util.Map.Entry;
14import java.util.Scanner;
15
16import be.nikiroo.fanfix.Instance;
17import be.nikiroo.fanfix.bundles.Config;
18import be.nikiroo.fanfix.bundles.StringId;
19import be.nikiroo.fanfix.data.Chapter;
20import be.nikiroo.fanfix.data.MetaData;
21import be.nikiroo.fanfix.data.Paragraph;
22import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
23import be.nikiroo.fanfix.data.Story;
24import be.nikiroo.utils.StringUtils;
25
26/**
27 * This class is the base class used by the other support classes. It can be
28 * used outside of this package, and have static method that you can use to get
29 * access to the correct support class.
30 * <p>
31 * It will be used with 'resources' (usually web pages or files).
32 *
33 * @author niki
34 */
35public abstract class BasicSupport {
36 /**
37 * The supported input types for which we can get a {@link BasicSupport}
38 * object.
39 *
40 * @author niki
41 */
42 public enum SupportType {
43 /** EPUB files created with this program */
44 EPUB,
45 /** Pure text file with some rules */
46 TEXT,
47 /** TEXT but with associated .info file */
48 INFO_TEXT,
49 /** My Little Pony fanfictions */
50 FIMFICTION,
51 /** Fanfictions from a lot of different universes */
52 FANFICTION,
53 /** Website with lots of Mangas */
54 MANGAFOX,
55 /** Furry website with comics support */
56 E621,
57 /** CBZ files */
58 CBZ;
59
60 /**
61 * A description of this support type (more information than the
62 * {@link BasicSupport#getSourceName()}).
63 *
64 * @return the description
65 */
66 public String getDesc() {
67 String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
68 this.name());
69
70 if (desc == null) {
71 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
72 }
73
74 return desc;
75 }
76
77 /**
78 * The name of this support type (a short version).
79 *
80 * @return the name
81 */
82 public String getSourceName() {
83 BasicSupport support = BasicSupport.getSupport(this);
84 if (support != null) {
85 return support.getSourceName();
86 }
87
88 return null;
89 }
90
91 @Override
92 public String toString() {
93 return super.toString().toLowerCase();
94 }
95
96 /**
97 * Call {@link SupportType#valueOf(String.toUpperCase())}.
98 *
99 * @param typeName
100 * the possible type name
101 *
102 * @return NULL or the type
103 */
104 public static SupportType valueOfUC(String typeName) {
105 return SupportType.valueOf(typeName == null ? null : typeName
106 .toUpperCase());
107 }
108
109 /**
110 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
111 * NULL for NULL instead of raising exception.
112 *
113 * @param typeName
114 * the possible type name
115 *
116 * @return NULL or the type
117 */
118 public static SupportType valueOfNullOkUC(String typeName) {
119 if (typeName == null) {
120 return null;
121 }
122
123 return SupportType.valueOfUC(typeName);
124 }
125
126 /**
127 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
128 * NULL in case of error instead of raising an exception.
129 *
130 * @param typeName
131 * the possible type name
132 *
133 * @return NULL or the type
134 */
135 public static SupportType valueOfAllOkUC(String typeName) {
136 try {
137 return SupportType.valueOfUC(typeName);
138 } catch (Exception e) {
139 return null;
140 }
141 }
142 }
143
144 /** Only used by {@link BasicSupport#getInput()} just so it is always reset. */
145 private InputStream in;
146 private SupportType type;
147 private URL currentReferer; // with on 'r', as in 'HTTP'...
148
149 // quote chars
150 private char openQuote = Instance.getTrans().getChar(
151 StringId.OPEN_SINGLE_QUOTE);
152 private char closeQuote = Instance.getTrans().getChar(
153 StringId.CLOSE_SINGLE_QUOTE);
154 private char openDoubleQuote = Instance.getTrans().getChar(
155 StringId.OPEN_DOUBLE_QUOTE);
156 private char closeDoubleQuote = Instance.getTrans().getChar(
157 StringId.CLOSE_DOUBLE_QUOTE);
158
159 /**
160 * The name of this support class.
161 *
162 * @return the name
163 */
164 protected abstract String getSourceName();
165
166 /**
167 * Check if the given resource is supported by this {@link BasicSupport}.
168 *
169 * @param url
170 * the resource to check for
171 *
172 * @return TRUE if it is
173 */
174 protected abstract boolean supports(URL url);
175
176 /**
177 * Return TRUE if the support will return HTML encoded content values for
178 * the chapters content.
179 *
180 * @return TRUE for HTML
181 */
182 protected abstract boolean isHtml();
183
184 /**
185 * Return the story title.
186 *
187 * @param source
188 * the source of the story
189 * @param in
190 * the input (the main resource)
191 *
192 * @return the title
193 *
194 * @throws IOException
195 * in case of I/O error
196 */
197 protected abstract String getTitle(URL source, InputStream in)
198 throws IOException;
199
200 /**
201 * Return the story author.
202 *
203 * @param source
204 * the source of the story
205 * @param in
206 * the input (the main resource)
207 *
208 * @return the author
209 *
210 * @throws IOException
211 * in case of I/O error
212 */
213 protected abstract String getAuthor(URL source, InputStream in)
214 throws IOException;
215
216 /**
217 * Return the story publication date.
218 *
219 * @param source
220 * the source of the story
221 * @param in
222 * the input (the main resource)
223 *
224 * @return the date
225 *
226 * @throws IOException
227 * in case of I/O error
228 */
229 protected abstract String getDate(URL source, InputStream in)
230 throws IOException;
231
232 /**
233 * Return the subject of the story (for instance, if it is a fanfiction,
234 * what is the original work; if it is a technical text, what is the
235 * technical subject...).
236 *
237 * @param source
238 * the source of the story
239 * @param in
240 * the input (the main resource)
241 *
242 * @return the subject
243 *
244 * @throws IOException
245 * in case of I/O error
246 */
247 protected abstract String getSubject(URL source, InputStream in)
248 throws IOException;
249
250 /**
251 * Return the story description.
252 *
253 * @param source
254 * the source of the story
255 * @param in
256 * the input (the main resource)
257 *
258 * @return the description
259 *
260 * @throws IOException
261 * in case of I/O error
262 */
263 protected abstract String getDesc(URL source, InputStream in)
264 throws IOException;
265
266 /**
267 * Return the story cover resource if any, or NULL if none.
268 * <p>
269 * The default cover should not be checked for here.
270 *
271 * @param source
272 * the source of the story
273 * @param in
274 * the input (the main resource)
275 *
276 * @return the cover or NULL
277 *
278 * @throws IOException
279 * in case of I/O error
280 */
281 protected abstract URL getCover(URL source, InputStream in)
282 throws IOException;
283
284 /**
285 * Return the list of chapters (name and resource).
286 *
287 * @param source
288 * the source of the story
289 * @param in
290 * the input (the main resource)
291 *
292 * @return the chapters
293 *
294 * @throws IOException
295 * in case of I/O error
296 */
297 protected abstract List<Entry<String, URL>> getChapters(URL source,
298 InputStream in) throws IOException;
299
300 /**
301 * Return the content of the chapter (possibly HTML encoded, if
302 * {@link BasicSupport#isHtml()} is TRUE).
303 *
304 * @param source
305 * the source of the story
306 * @param in
307 * the input (the main resource)
308 * @param number
309 * the chapter number
310 *
311 * @return the content
312 *
313 * @throws IOException
314 * in case of I/O error
315 */
316 protected abstract String getChapterContent(URL source, InputStream in,
317 int number) throws IOException;
318
319 /**
320 * Check if this {@link BasicSupport} is mainly catered to image files.
321 *
322 * @return TRUE if it is
323 */
324 public boolean isImageDocument(URL source, InputStream in)
325 throws IOException {
326 return false;
327 }
328
329 /**
330 * Return the list of cookies (values included) that must be used to
331 * correctly fetch the resources.
332 * <p>
333 * You are expected to call the super method implementation if you override
334 * it.
335 *
336 * @return the cookies
337 */
338 public Map<String, String> getCookies() {
339 return new HashMap<String, String>();
340 }
341
342 /**
343 * Process the given story resource into a partially filled {@link Story}
344 * object containing the name and metadata, except for the description.
345 *
346 * @param url
347 * the story resource
348 *
349 * @return the {@link Story}
350 *
351 * @throws IOException
352 * in case of I/O error
353 */
354 public Story processMeta(URL url) throws IOException {
355 return processMeta(url, true, false);
356 }
357
358 /**
359 * Process the given story resource into a partially filled {@link Story}
360 * object containing the name and metadata.
361 *
362 * @param url
363 * the story resource
364 *
365 * @param close
366 * close "this" and "in" when done
367 *
368 * @return the {@link Story}
369 *
370 * @throws IOException
371 * in case of I/O error
372 */
373 protected Story processMeta(URL url, boolean close, boolean getDesc)
374 throws IOException {
375 in = Instance.getCache().open(url, this, false);
376 if (in == null) {
377 return null;
378 }
379
380 try {
381 preprocess(getInput());
382
383 Story story = new Story();
384 story.setMeta(new MetaData());
385 story.getMeta().setTitle(ifUnhtml(getTitle(url, getInput())));
386 story.getMeta().setAuthor(
387 fixAuthor(ifUnhtml(getAuthor(url, getInput()))));
388 story.getMeta().setDate(ifUnhtml(getDate(url, getInput())));
389 story.getMeta().setTags(getTags(url, getInput()));
390 story.getMeta().setSource(getSourceName());
391 story.getMeta().setPublisher(
392 ifUnhtml(getPublisher(url, getInput())));
393 story.getMeta().setUuid(getUuid(url, getInput()));
394 story.getMeta().setLuid(getLuid(url, getInput()));
395 story.getMeta().setLang(getLang(url, getInput()));
396 story.getMeta().setSubject(ifUnhtml(getSubject(url, getInput())));
397 story.getMeta().setImageDocument(isImageDocument(url, getInput()));
398
399 if (getDesc) {
400 String descChapterName = Instance.getTrans().getString(
401 StringId.DESCRIPTION);
402 story.getMeta().setResume(
403 makeChapter(url, 0, descChapterName,
404 getDesc(url, getInput())));
405 }
406
407 return story;
408 } finally {
409 if (close) {
410 try {
411 close();
412 } catch (IOException e) {
413 Instance.syserr(e);
414 }
415
416 if (in != null) {
417 in.close();
418 }
419 }
420 }
421 }
422
423 /**
424 * Process the given story resource into a fully filled {@link Story}
425 * object.
426 *
427 * @param url
428 * the story resource
429 *
430 * @return the {@link Story}
431 *
432 * @throws IOException
433 * in case of I/O error
434 */
435 public Story process(URL url) throws IOException {
436 setCurrentReferer(url);
437
438 try {
439 Story story = processMeta(url, false, true);
440 if (story == null) {
441 return null;
442 }
443
444 story.setChapters(new ArrayList<Chapter>());
445
446 URL cover = getCover(url, getInput());
447 if (cover == null) {
448 String subject = story.getMeta() == null ? null : story
449 .getMeta().getSubject();
450 if (subject != null && !subject.isEmpty()
451 && Instance.getCoverDir() != null) {
452 File fileCover = new File(Instance.getCoverDir(), subject);
453 cover = getImage(fileCover.toURI().toURL(), subject);
454 }
455 }
456
457 if (cover != null) {
458 InputStream coverIn = null;
459 try {
460 coverIn = Instance.getCache().open(cover, this, true);
461 story.getMeta().setCover(StringUtils.toImage(coverIn));
462 } catch (IOException e) {
463 Instance.syserr(new IOException(Instance.getTrans()
464 .getString(StringId.ERR_BS_NO_COVER, cover), e));
465 } finally {
466 if (coverIn != null)
467 coverIn.close();
468 }
469 }
470
471 List<Entry<String, URL>> chapters = getChapters(url, getInput());
472 int i = 1;
473 if (chapters != null) {
474 for (Entry<String, URL> chap : chapters) {
475 setCurrentReferer(chap.getValue());
476 InputStream chapIn = Instance.getCache().open(
477 chap.getValue(), this, true);
478 try {
479 story.getChapters().add(
480 makeChapter(url, i, chap.getKey(),
481 getChapterContent(url, chapIn, i)));
482 } finally {
483 chapIn.close();
484 }
485 i++;
486 }
487 }
488
489 return story;
490
491 } finally {
492 try {
493 close();
494 } catch (IOException e) {
495 Instance.syserr(e);
496 }
497
498 if (in != null) {
499 in.close();
500 }
501
502 currentReferer = null;
503 }
504 }
505
506 /**
507 * The support type.$
508 *
509 * @return the type
510 */
511 public SupportType getType() {
512 return type;
513 }
514
515 /**
516 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
517 * the current {@link URL} we work on.
518 *
519 * @return the referer
520 */
521 public URL getCurrentReferer() {
522 return currentReferer;
523 }
524
525 /**
526 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
527 * the current {@link URL} we work on.
528 *
529 * @param currentReferer
530 * the new referer
531 */
532 protected void setCurrentReferer(URL currentReferer) {
533 this.currentReferer = currentReferer;
534 }
535
536 /**
537 * The support type.
538 *
539 * @param type
540 * the new type
541 *
542 * @return this
543 */
544 protected BasicSupport setType(SupportType type) {
545 this.type = type;
546 return this;
547 }
548
549 /**
550 * Return the story publisher (by default,
551 * {@link BasicSupport#getSourceName()}).
552 *
553 * @param source
554 * the source of the story
555 * @param in
556 * the input (the main resource)
557 *
558 * @return the publisher
559 *
560 * @throws IOException
561 * in case of I/O error
562 */
563 protected String getPublisher(URL source, InputStream in)
564 throws IOException {
565 return getSourceName();
566 }
567
568 /**
569 * Return the story UUID, a unique value representing the story (it is often
570 * an URL).
571 * <p>
572 * By default, this is the {@link URL} of the resource.
573 *
574 * @param source
575 * the source of the story
576 * @param in
577 * the input (the main resource)
578 *
579 * @return the uuid
580 *
581 * @throws IOException
582 * in case of I/O error
583 */
584 protected String getUuid(URL source, InputStream in) throws IOException {
585 return source.toString();
586 }
587
588 /**
589 * Return the story Library UID, a unique value representing the story (it
590 * is often a number) in the local library.
591 * <p>
592 * By default, this is empty.
593 *
594 * @param source
595 * the source of the story
596 * @param in
597 * the input (the main resource)
598 *
599 * @return the id
600 *
601 * @throws IOException
602 * in case of I/O error
603 */
604 protected String getLuid(URL source, InputStream in) throws IOException {
605 return "";
606 }
607
608 /**
609 * Return the 2-letter language code of this story.
610 * <p>
611 * By default, this is 'EN'.
612 *
613 * @param source
614 * the source of the story
615 * @param in
616 * the input (the main resource)
617 *
618 * @return the language
619 *
620 * @throws IOException
621 * in case of I/O error
622 */
623 protected String getLang(URL source, InputStream in) throws IOException {
624 return "EN";
625 }
626
627 /**
628 * Return the list of tags for this story.
629 *
630 * @param source
631 * the source of the story
632 * @param in
633 * the input (the main resource)
634 *
635 * @return the tags
636 *
637 * @throws IOException
638 * in case of I/O error
639 */
640 protected List<String> getTags(URL source, InputStream in)
641 throws IOException {
642 return new ArrayList<String>();
643 }
644
645 /**
646 * Return the first line from the given input which correspond to the given
647 * selectors.
648 * <p>
649 * Do not reset the input, which will be pointing at the line just after the
650 * result (input will be spent if no result is found).
651 *
652 * @param in
653 * the input
654 * @param needle
655 * a string that must be found inside the target line (also
656 * supports "^" at start to say "only if it starts with" the
657 * needle)
658 * @param relativeLine
659 * the line to return based upon the target line position (-1 =
660 * the line before, 0 = the target line...)
661 *
662 * @return the line
663 */
664 protected String getLine(InputStream in, String needle, int relativeLine) {
665 return getLine(in, needle, relativeLine, true);
666 }
667
668 /**
669 * Return a line from the given input which correspond to the given
670 * selectors.
671 * <p>
672 * Do not reset the input, which will be pointing at the line just after the
673 * result (input will be spent if no result is found) when first is TRUE,
674 * and will always be spent if first is FALSE.
675 *
676 * @param in
677 * the input
678 * @param needle
679 * a string that must be found inside the target line (also
680 * supports "^" at start to say "only if it starts with" the
681 * needle)
682 * @param relativeLine
683 * the line to return based upon the target line position (-1 =
684 * the line before, 0 = the target line...)
685 * @param first
686 * takes the first result (as opposed to the last one, which will
687 * also always spend the input)
688 *
689 * @return the line
690 */
691 protected String getLine(InputStream in, String needle, int relativeLine,
692 boolean first) {
693 String rep = null;
694
695 List<String> lines = new ArrayList<String>();
696 @SuppressWarnings("resource")
697 Scanner scan = new Scanner(in, "UTF-8");
698 int index = -1;
699 scan.useDelimiter("\\n");
700 while (scan.hasNext()) {
701 lines.add(scan.next());
702
703 if (index == -1) {
704 if (needle.startsWith("^")) {
705 if (lines.get(lines.size() - 1).startsWith(
706 needle.substring(1))) {
707 index = lines.size() - 1;
708 }
709
710 } else {
711 if (lines.get(lines.size() - 1).contains(needle)) {
712 index = lines.size() - 1;
713 }
714 }
715 }
716
717 if (index >= 0 && index + relativeLine < lines.size()) {
718 rep = lines.get(index + relativeLine);
719 if (first) {
720 break;
721 }
722 }
723 }
724
725 return rep;
726 }
727
728 /**
729 * Prepare the support if needed before processing.
730 *
731 * @throws IOException
732 * on I/O error
733 */
734 protected void preprocess(InputStream in) throws IOException {
735 }
736
737 /**
738 * Now that we have processed the {@link Story}, close the resources if any.
739 *
740 * @throws IOException
741 * on I/O error
742 */
743 protected void close() throws IOException {
744 }
745
746 /**
747 * Create a {@link Chapter} object from the given information, formatting
748 * the content as it should be.
749 *
750 * @param number
751 * the chapter number
752 * @param name
753 * the chapter name
754 * @param content
755 * the chapter content
756 *
757 * @return the {@link Chapter}
758 *
759 * @throws IOException
760 * in case of I/O error
761 */
762 protected Chapter makeChapter(URL source, int number, String name,
763 String content) throws IOException {
764
765 // Chapter name: process it correctly, then remove the possible
766 // redundant "Chapter x: " in front of it
767 String chapterName = processPara(name).getContent().trim();
768 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
769 .split(",")) {
770 String chapterWord = Instance.getConfig().getStringX(
771 Config.CHAPTER, lang);
772 if (chapterName.startsWith(chapterWord)) {
773 chapterName = chapterName.substring(chapterWord.length())
774 .trim();
775 break;
776 }
777 }
778
779 if (chapterName.startsWith(Integer.toString(number))) {
780 chapterName = chapterName.substring(
781 Integer.toString(number).length()).trim();
782 }
783
784 if (chapterName.startsWith(":")) {
785 chapterName = chapterName.substring(1).trim();
786 }
787 //
788
789 Chapter chap = new Chapter(number, chapterName);
790
791 if (content == null) {
792 return chap;
793 }
794
795 if (isHtml()) {
796 // Special <HR> processing:
797 content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
798 "\n* * *\n");
799 }
800
801 InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8"));
802 try {
803 @SuppressWarnings("resource")
804 Scanner scan = new Scanner(in, "UTF-8");
805 scan.useDelimiter("(\\n|</p>)"); // \n for test, </p> for html
806
807 List<Paragraph> paras = new ArrayList<Paragraph>();
808 while (scan.hasNext()) {
809 String line = scan.next().trim();
810 boolean image = false;
811 if (line.startsWith("[") && line.endsWith("]")) {
812 URL url = getImage(source,
813 line.substring(1, line.length() - 1).trim());
814 if (url != null) {
815 paras.add(new Paragraph(url));
816 image = true;
817 }
818 }
819
820 if (!image) {
821 paras.add(processPara(line));
822 }
823 }
824
825 // Check quotes for "bad" format
826 List<Paragraph> newParas = new ArrayList<Paragraph>();
827 for (Paragraph para : paras) {
828 newParas.addAll(requotify(para));
829 }
830 paras = newParas;
831
832 // Remove double blanks/brks
833 boolean space = false;
834 boolean brk = true;
835 for (int i = 0; i < paras.size(); i++) {
836 Paragraph para = paras.get(i);
837 boolean thisSpace = para.getType() == ParagraphType.BLANK;
838 boolean thisBrk = para.getType() == ParagraphType.BREAK;
839
840 if (space && thisBrk) {
841 paras.remove(i - 1);
842 i--;
843 } else if ((space || brk) && (thisSpace || thisBrk)) {
844 paras.remove(i);
845 i--;
846 }
847
848 space = thisSpace;
849 brk = thisBrk;
850 }
851
852 // Remove blank/brk at start
853 if (paras.size() > 0
854 && (paras.get(0).getType() == ParagraphType.BLANK || paras
855 .get(0).getType() == ParagraphType.BREAK)) {
856 paras.remove(0);
857 }
858
859 // Remove blank/brk at end
860 int last = paras.size() - 1;
861 if (paras.size() > 0
862 && (paras.get(last).getType() == ParagraphType.BLANK || paras
863 .get(last).getType() == ParagraphType.BREAK)) {
864 paras.remove(last);
865 }
866
867 chap.setParagraphs(paras);
868
869 return chap;
870 } finally {
871 in.close();
872 }
873 }
874
875 /**
876 * Return the list of supported image extensions.
877 *
878 * @return the extensions
879 */
880 protected String[] getImageExt(boolean emptyAllowed) {
881 if (emptyAllowed) {
882 return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
883 } else {
884 return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
885 }
886 }
887
888 /**
889 * Check if the given resource can be a local image or a remote image, then
890 * refresh the cache with it if it is.
891 *
892 * @param source
893 * the story source
894 * @param line
895 * the resource to check
896 *
897 * @return the image URL if found, or NULL
898 *
899 */
900 protected URL getImage(URL source, String line) {
901 String path = new File(source.getFile()).getParent();
902 URL url = null;
903
904 // try for files
905 try {
906 String urlBase = new File(new File(path), line.trim()).toURI()
907 .toURL().toString();
908 for (String ext : getImageExt(true)) {
909 if (new File(urlBase + ext).exists()) {
910 url = new File(urlBase + ext).toURI().toURL();
911 }
912 }
913 } catch (Exception e) {
914 // Nothing to do here
915 }
916
917 if (url == null) {
918 // try for URLs
919 try {
920 for (String ext : getImageExt(true)) {
921 if (Instance.getCache().check(new URL(line + ext))) {
922 url = new URL(line + ext);
923 }
924 }
925
926 // try out of cache
927 if (url == null) {
928 for (String ext : getImageExt(true)) {
929 try {
930 url = new URL(line + ext);
931 Instance.getCache().refresh(url, this, true);
932 break;
933 } catch (IOException e) {
934 // no image with this ext
935 url = null;
936 }
937 }
938 }
939 } catch (MalformedURLException e) {
940 // Not an url
941 }
942 }
943
944 // refresh the cached file
945 if (url != null) {
946 try {
947 Instance.getCache().refresh(url, this, true);
948 } catch (IOException e) {
949 // woops, broken image
950 url = null;
951 }
952 }
953
954 return url;
955 }
956
957 /**
958 * Reset then return {@link BasicSupport#in}.
959 *
960 * @return {@link BasicSupport#in}
961 *
962 * @throws IOException
963 * in case of I/O error
964 */
965 protected InputStream getInput() throws IOException {
966 in.reset();
967 return in;
968 }
969
970 /**
971 * Fix the author name if it is prefixed with some "by" {@link String}.
972 *
973 * @param author
974 * the author with a possible prefix
975 *
976 * @return the author without prefixes
977 */
978 private String fixAuthor(String author) {
979 if (author != null) {
980 for (String suffix : new String[] { " ", ":" }) {
981 for (String byString : Instance.getConfig()
982 .getString(Config.BYS).split(",")) {
983 byString += suffix;
984 if (author.toUpperCase().startsWith(byString.toUpperCase())) {
985 author = author.substring(byString.length()).trim();
986 }
987 }
988 }
989
990 // Special case (without suffix):
991 if (author.startsWith("©")) {
992 author = author.substring(1);
993 }
994 }
995
996 return author;
997 }
998
999 /**
1000 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
1001 * and requotify them (i.e., separate them into QUOTE paragraphs and other
1002 * paragraphs (quotes or not)).
1003 *
1004 * @param para
1005 * the paragraph to requotify (not necessaraly a quote)
1006 *
1007 * @return the correctly (or so we hope) quotified paragraphs
1008 */
1009 private List<Paragraph> requotify(Paragraph para) {
1010 List<Paragraph> newParas = new ArrayList<Paragraph>();
1011
1012 if (para.getType() == ParagraphType.QUOTE) {
1013 String line = para.getContent();
1014 boolean singleQ = line.startsWith("" + openQuote);
1015 boolean doubleQ = line.startsWith("" + openDoubleQuote);
1016
1017 if (!singleQ && !doubleQ) {
1018 line = openDoubleQuote + line + closeDoubleQuote;
1019 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
1020 } else {
1021 char close = singleQ ? closeQuote : closeDoubleQuote;
1022 int posClose = line.indexOf(close);
1023 int posDot = line.indexOf(".");
1024 while (posDot >= 0 && posDot < posClose) {
1025 posDot = line.indexOf(".", posDot + 1);
1026 }
1027
1028 if (posDot >= 0) {
1029 String rest = line.substring(posDot + 1).trim();
1030 line = line.substring(0, posDot + 1).trim();
1031 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
1032 newParas.addAll(requotify(processPara(rest)));
1033 } else {
1034 newParas.add(para);
1035 }
1036 }
1037 } else {
1038 newParas.add(para);
1039 }
1040
1041 return newParas;
1042 }
1043
1044 /**
1045 * Process a {@link Paragraph} from a raw line of text.
1046 * <p>
1047 * Will also fix quotes and HTML encoding if needed.
1048 *
1049 * @param line
1050 * the raw line
1051 *
1052 * @return the processed {@link Paragraph}
1053 */
1054 private Paragraph processPara(String line) {
1055 line = ifUnhtml(line).trim();
1056
1057 boolean space = true;
1058 boolean brk = true;
1059 boolean quote = false;
1060 boolean tentativeCloseQuote = false;
1061 char prev = '\0';
1062 int dashCount = 0;
1063
1064 StringBuilder builder = new StringBuilder();
1065 for (char car : line.toCharArray()) {
1066 if (car != '-') {
1067 if (dashCount > 0) {
1068 // dash, ndash and mdash: - – —
1069 // currently: always use mdash
1070 builder.append(dashCount == 1 ? '-' : '—');
1071 }
1072 dashCount = 0;
1073 }
1074
1075 if (tentativeCloseQuote) {
1076 tentativeCloseQuote = false;
1077 if ((car >= 'a' && car <= 'z') || (car >= 'A' && car <= 'Z')
1078 || (car >= '0' && car <= '9')) {
1079 builder.append("'");
1080 } else {
1081 builder.append(closeQuote);
1082 }
1083 }
1084
1085 switch (car) {
1086 case ' ': // note: unbreakable space
1087 case ' ':
1088 case '\t':
1089 case '\n': // just in case
1090 case '\r': // just in case
1091 builder.append(' ');
1092 break;
1093
1094 case '\'':
1095 if (space || (brk && quote)) {
1096 quote = true;
1097 builder.append(openQuote);
1098 } else if (prev == ' ') {
1099 builder.append(openQuote);
1100 } else {
1101 // it is a quote ("I'm off") or a 'quote' ("This
1102 // 'good' restaurant"...)
1103 tentativeCloseQuote = true;
1104 }
1105 break;
1106
1107 case '"':
1108 if (space || (brk && quote)) {
1109 quote = true;
1110 builder.append(openDoubleQuote);
1111 } else if (prev == ' ') {
1112 builder.append(openDoubleQuote);
1113 } else {
1114 builder.append(closeDoubleQuote);
1115 }
1116 break;
1117
1118 case '-':
1119 if (space) {
1120 quote = true;
1121 } else {
1122 dashCount++;
1123 }
1124 space = false;
1125 break;
1126
1127 case '*':
1128 case '~':
1129 case '/':
1130 case '\\':
1131 case '<':
1132 case '>':
1133 case '=':
1134 case '+':
1135 case '_':
1136 case '–':
1137 case '—':
1138 space = false;
1139 builder.append(car);
1140 break;
1141
1142 case '‘':
1143 case '`':
1144 case '‹':
1145 case '﹁':
1146 case '〈':
1147 case '「':
1148 if (space || (brk && quote)) {
1149 quote = true;
1150 builder.append(openQuote);
1151 } else {
1152 builder.append(openQuote);
1153 }
1154 space = false;
1155 brk = false;
1156 break;
1157
1158 case '’':
1159 case '›':
1160 case '﹂':
1161 case '〉':
1162 case '」':
1163 space = false;
1164 brk = false;
1165 builder.append(closeQuote);
1166 break;
1167
1168 case '«':
1169 case '“':
1170 case '﹃':
1171 case '《':
1172 case '『':
1173 if (space || (brk && quote)) {
1174 quote = true;
1175 builder.append(openDoubleQuote);
1176 } else {
1177 builder.append(openDoubleQuote);
1178 }
1179 space = false;
1180 brk = false;
1181 break;
1182
1183 case '»':
1184 case '”':
1185 case '﹄':
1186 case '》':
1187 case '』':
1188 space = false;
1189 brk = false;
1190 builder.append(closeDoubleQuote);
1191 break;
1192
1193 default:
1194 space = false;
1195 brk = false;
1196 builder.append(car);
1197 break;
1198 }
1199
1200 prev = car;
1201 }
1202
1203 if (tentativeCloseQuote) {
1204 tentativeCloseQuote = false;
1205 builder.append(closeQuote);
1206 }
1207
1208 line = builder.toString().trim();
1209
1210 ParagraphType type = ParagraphType.NORMAL;
1211 if (space) {
1212 type = ParagraphType.BLANK;
1213 } else if (brk) {
1214 type = ParagraphType.BREAK;
1215 } else if (quote) {
1216 type = ParagraphType.QUOTE;
1217 }
1218
1219 return new Paragraph(type, line);
1220 }
1221
1222 /**
1223 * Remove the HTML from the inpit <b>if</b> {@link BasicSupport#isHtml()} is
1224 * true.
1225 *
1226 * @param input
1227 * the input
1228 *
1229 * @return the no html version if needed
1230 */
1231 private String ifUnhtml(String input) {
1232 if (isHtml() && input != null) {
1233 return StringUtils.unhtml(input);
1234 }
1235
1236 return input;
1237 }
1238
1239 /**
1240 * Return a {@link BasicSupport} implementation supporting the given
1241 * resource if possible.
1242 *
1243 * @param url
1244 * the story resource
1245 *
1246 * @return an implementation that supports it, or NULL
1247 */
1248 public static BasicSupport getSupport(URL url) {
1249 if (url == null) {
1250 return null;
1251 }
1252
1253 // TEXT and INFO_TEXT always support files (not URLs though)
1254 for (SupportType type : SupportType.values()) {
1255 if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1256 BasicSupport support = getSupport(type);
1257 if (support != null && support.supports(url)) {
1258 return support;
1259 }
1260 }
1261 }
1262
1263 for (SupportType type : new SupportType[] { SupportType.TEXT,
1264 SupportType.INFO_TEXT }) {
1265 BasicSupport support = getSupport(type);
1266 if (support != null && support.supports(url)) {
1267 return support;
1268 }
1269 }
1270
1271 return null;
1272 }
1273
1274 /**
1275 * Return a {@link BasicSupport} implementation supporting the given type.
1276 *
1277 * @param type
1278 * the type
1279 *
1280 * @return an implementation that supports it, or NULL
1281 */
1282 public static BasicSupport getSupport(SupportType type) {
1283 switch (type) {
1284 case EPUB:
1285 return new Epub().setType(type);
1286 case INFO_TEXT:
1287 return new InfoText().setType(type);
1288 case FIMFICTION:
1289 return new Fimfiction().setType(type);
1290 case FANFICTION:
1291 return new Fanfiction().setType(type);
1292 case TEXT:
1293 return new Text().setType(type);
1294 case MANGAFOX:
1295 return new MangaFox().setType(type);
1296 case E621:
1297 return new E621().setType(type);
1298 case CBZ:
1299 return new Cbz().setType(type);
1300 }
1301
1302 return null;
1303 }
1304}