Partially fix YiffStar support
[fanfix.git] / src / be / nikiroo / fanfix / supported / BasicSupport.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.awt.image.BufferedImage;
4 import java.io.BufferedReader;
5 import java.io.ByteArrayInputStream;
6 import java.io.File;
7 import java.io.IOException;
8 import java.io.InputStream;
9 import java.io.InputStreamReader;
10 import java.net.MalformedURLException;
11 import java.net.URL;
12 import java.util.ArrayList;
13 import java.util.HashMap;
14 import java.util.List;
15 import java.util.Map;
16 import java.util.Map.Entry;
17 import java.util.Scanner;
18
19 import be.nikiroo.fanfix.Instance;
20 import be.nikiroo.fanfix.bundles.Config;
21 import be.nikiroo.fanfix.bundles.StringId;
22 import be.nikiroo.fanfix.data.Chapter;
23 import be.nikiroo.fanfix.data.MetaData;
24 import be.nikiroo.fanfix.data.Paragraph;
25 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
26 import be.nikiroo.fanfix.data.Story;
27 import be.nikiroo.utils.IOUtils;
28 import be.nikiroo.utils.Progress;
29 import be.nikiroo.utils.StringUtils;
30
31 /**
32 * This class is the base class used by the other support classes. It can be
33 * used outside of this package, and have static method that you can use to get
34 * access to the correct support class.
35 * <p>
36 * It will be used with 'resources' (usually web pages or files).
37 *
38 * @author niki
39 */
40 public abstract class BasicSupport {
41 /**
42 * The supported input types for which we can get a {@link BasicSupport}
43 * object.
44 *
45 * @author niki
46 */
47 public enum SupportType {
48 /** EPUB files created with this program */
49 EPUB,
50 /** Pure text file with some rules */
51 TEXT,
52 /** TEXT but with associated .info file */
53 INFO_TEXT,
54 /** My Little Pony fanfictions */
55 FIMFICTION,
56 /** Fanfictions from a lot of different universes */
57 FANFICTION,
58 /** Website with lots of Mangas */
59 MANGAFOX,
60 /** Furry website with comics support */
61 E621,
62 /** Furry website with stories */
63 YIFFSTAR,
64 /** CBZ files */
65 CBZ,
66 /** HTML files */
67 HTML;
68
69 /**
70 * A description of this support type (more information than the
71 * {@link BasicSupport#getSourceName()}).
72 *
73 * @return the description
74 */
75 public String getDesc() {
76 String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
77 this.name());
78
79 if (desc == null) {
80 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
81 }
82
83 return desc;
84 }
85
86 /**
87 * The name of this support type (a short version).
88 *
89 * @return the name
90 */
91 public String getSourceName() {
92 BasicSupport support = BasicSupport.getSupport(this);
93 if (support != null) {
94 return support.getSourceName();
95 }
96
97 return null;
98 }
99
100 @Override
101 public String toString() {
102 return super.toString().toLowerCase();
103 }
104
105 /**
106 * Call {@link SupportType#valueOf(String.toUpperCase())}.
107 *
108 * @param typeName
109 * the possible type name
110 *
111 * @return NULL or the type
112 */
113 public static SupportType valueOfUC(String typeName) {
114 return SupportType.valueOf(typeName == null ? null : typeName
115 .toUpperCase());
116 }
117
118 /**
119 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
120 * NULL for NULL instead of raising exception.
121 *
122 * @param typeName
123 * the possible type name
124 *
125 * @return NULL or the type
126 */
127 public static SupportType valueOfNullOkUC(String typeName) {
128 if (typeName == null) {
129 return null;
130 }
131
132 return SupportType.valueOfUC(typeName);
133 }
134
135 /**
136 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
137 * NULL in case of error instead of raising an exception.
138 *
139 * @param typeName
140 * the possible type name
141 *
142 * @return NULL or the type
143 */
144 public static SupportType valueOfAllOkUC(String typeName) {
145 try {
146 return SupportType.valueOfUC(typeName);
147 } catch (Exception e) {
148 return null;
149 }
150 }
151 }
152
153 private InputStream in;
154 private SupportType type;
155 private URL currentReferer; // with only one 'r', as in 'HTTP'...
156
157 // quote chars
158 private char openQuote = Instance.getTrans().getChar(
159 StringId.OPEN_SINGLE_QUOTE);
160 private char closeQuote = Instance.getTrans().getChar(
161 StringId.CLOSE_SINGLE_QUOTE);
162 private char openDoubleQuote = Instance.getTrans().getChar(
163 StringId.OPEN_DOUBLE_QUOTE);
164 private char closeDoubleQuote = Instance.getTrans().getChar(
165 StringId.CLOSE_DOUBLE_QUOTE);
166
167 /**
168 * The name of this support class.
169 *
170 * @return the name
171 */
172 protected abstract String getSourceName();
173
174 /**
175 * Check if the given resource is supported by this {@link BasicSupport}.
176 *
177 * @param url
178 * the resource to check for
179 *
180 * @return TRUE if it is
181 */
182 protected abstract boolean supports(URL url);
183
184 /**
185 * Return TRUE if the support will return HTML encoded content values for
186 * the chapters content.
187 *
188 * @return TRUE for HTML
189 */
190 protected abstract boolean isHtml();
191
192 protected abstract MetaData getMeta(URL source, InputStream in)
193 throws IOException;
194
195 /**
196 * Return the story description.
197 *
198 * @param source
199 * the source of the story
200 * @param in
201 * the input (the main resource)
202 *
203 * @return the description
204 *
205 * @throws IOException
206 * in case of I/O error
207 */
208 protected abstract String getDesc(URL source, InputStream in)
209 throws IOException;
210
211 /**
212 * Return the list of chapters (name and resource).
213 *
214 * @param source
215 * the source of the story
216 * @param in
217 * the input (the main resource)
218 *
219 * @return the chapters
220 *
221 * @throws IOException
222 * in case of I/O error
223 */
224 protected abstract List<Entry<String, URL>> getChapters(URL source,
225 InputStream in) throws IOException;
226
227 /**
228 * Return the content of the chapter (possibly HTML encoded, if
229 * {@link BasicSupport#isHtml()} is TRUE).
230 *
231 * @param source
232 * the source of the story
233 * @param in
234 * the input (the main resource)
235 * @param number
236 * the chapter number
237 *
238 * @return the content
239 *
240 * @throws IOException
241 * in case of I/O error
242 */
243 protected abstract String getChapterContent(URL source, InputStream in,
244 int number) throws IOException;
245
246 /**
247 * Log into the support (can be a no-op depending upon the support).
248 *
249 * @throws IOException
250 * in case of I/O error
251 */
252 public void login() throws IOException {
253
254 }
255
256 /**
257 * Return the list of cookies (values included) that must be used to
258 * correctly fetch the resources.
259 * <p>
260 * You are expected to call the super method implementation if you override
261 * it.
262 *
263 * @return the cookies
264 *
265 * @throws IOException
266 * in case of I/O error
267 */
268 public Map<String, String> getCookies() throws IOException {
269 return new HashMap<String, String>();
270 }
271
272 /**
273 * Return the canonical form of the main {@link URL}.
274 *
275 * @param source
276 * the source {@link URL}
277 *
278 * @return the canonical form of this {@link URL}
279 *
280 * @throws IOException
281 * in case of I/O error
282 */
283 public URL getCanonicalUrl(URL source) throws IOException {
284 return source;
285 }
286
287 /**
288 * Process the given story resource into a partially filled {@link Story}
289 * object containing the name and metadata, except for the description.
290 *
291 * @param url
292 * the story resource
293 *
294 * @return the {@link Story}
295 *
296 * @throws IOException
297 * in case of I/O error
298 */
299 public Story processMeta(URL url) throws IOException {
300 return processMeta(url, true, false);
301 }
302
303 /**
304 * Process the given story resource into a partially filled {@link Story}
305 * object containing the name and metadata.
306 *
307 * @param url
308 * the story resource
309 *
310 * @param close
311 * close "this" and "in" when done
312 *
313 * @return the {@link Story}
314 *
315 * @throws IOException
316 * in case of I/O error
317 */
318 protected Story processMeta(URL url, boolean close, boolean getDesc)
319 throws IOException {
320 login();
321
322 url = getCanonicalUrl(url);
323
324 setCurrentReferer(url);
325
326 in = openInput(url);
327 if (in == null) {
328 return null;
329 }
330
331 try {
332 preprocess(url, getInput());
333
334 Story story = new Story();
335 MetaData meta = getMeta(url, getInput());
336 story.setMeta(meta);
337
338 if (meta != null && meta.getCover() == null) {
339 meta.setCover(getDefaultCover(meta.getSubject()));
340 }
341
342 if (getDesc) {
343 String descChapterName = Instance.getTrans().getString(
344 StringId.DESCRIPTION);
345 story.getMeta().setResume(
346 makeChapter(url, 0, descChapterName,
347 getDesc(url, getInput())));
348 }
349
350 return story;
351 } finally {
352 if (close) {
353 try {
354 close();
355 } catch (IOException e) {
356 Instance.syserr(e);
357 }
358
359 if (in != null) {
360 in.close();
361 }
362 }
363
364 setCurrentReferer(null);
365 }
366 }
367
368 /**
369 * Process the given story resource into a fully filled {@link Story}
370 * object.
371 *
372 * @param url
373 * the story resource
374 * @param pg
375 * the optional progress reporter
376 *
377 * @return the {@link Story}
378 *
379 * @throws IOException
380 * in case of I/O error
381 */
382 public Story process(URL url, Progress pg) throws IOException {
383 if (pg == null) {
384 pg = new Progress();
385 } else {
386 pg.setMinMax(0, 100);
387 }
388
389 url = getCanonicalUrl(url);
390 pg.setProgress(1);
391 try {
392 Story story = processMeta(url, false, true);
393 pg.setProgress(10);
394 if (story == null) {
395 pg.setProgress(100);
396 return null;
397 }
398
399 setCurrentReferer(url);
400
401 story.setChapters(new ArrayList<Chapter>());
402
403 List<Entry<String, URL>> chapters = getChapters(url, getInput());
404 pg.setProgress(20);
405
406 int i = 1;
407 if (chapters != null) {
408 Progress pgChaps = new Progress(0, chapters.size());
409 pg.addProgress(pgChaps, 80);
410
411 for (Entry<String, URL> chap : chapters) {
412 setCurrentReferer(chap.getValue());
413 InputStream chapIn = Instance.getCache().open(
414 chap.getValue(), this, true);
415 try {
416 story.getChapters().add(
417 makeChapter(url, i, chap.getKey(),
418 getChapterContent(url, chapIn, i)));
419 } finally {
420 chapIn.close();
421 }
422
423 pgChaps.setProgress(i++);
424 }
425 } else {
426 pg.setProgress(100);
427 }
428
429 return story;
430
431 } finally {
432 try {
433 close();
434 } catch (IOException e) {
435 Instance.syserr(e);
436 }
437
438 if (in != null) {
439 in.close();
440 }
441
442 setCurrentReferer(null);
443 }
444 }
445
446 /**
447 * The support type.
448 *
449 * @return the type
450 */
451 public SupportType getType() {
452 return type;
453 }
454
455 /**
456 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
457 * the current {@link URL} we work on.
458 *
459 * @return the referer
460 */
461 public URL getCurrentReferer() {
462 return currentReferer;
463 }
464
465 /**
466 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
467 * the current {@link URL} we work on.
468 *
469 * @param currentReferer
470 * the new referer
471 */
472 protected void setCurrentReferer(URL currentReferer) {
473 this.currentReferer = currentReferer;
474 }
475
476 /**
477 * The support type.
478 *
479 * @param type
480 * the new type
481 *
482 * @return this
483 */
484 protected BasicSupport setType(SupportType type) {
485 this.type = type;
486 return this;
487 }
488
489 /**
490 * Prepare the support if needed before processing.
491 *
492 * @param source
493 * the source of the story
494 * @param in
495 * the input (the main resource)
496 *
497 * @throws IOException
498 * on I/O error
499 */
500 protected void preprocess(URL source, InputStream in) throws IOException {
501 }
502
503 /**
504 * Now that we have processed the {@link Story}, close the resources if any.
505 *
506 * @throws IOException
507 * on I/O error
508 */
509 protected void close() throws IOException {
510 }
511
512 /**
513 * Create a {@link Chapter} object from the given information, formatting
514 * the content as it should be.
515 *
516 * @param number
517 * the chapter number
518 * @param name
519 * the chapter name
520 * @param content
521 * the chapter content
522 *
523 * @return the {@link Chapter}
524 *
525 * @throws IOException
526 * in case of I/O error
527 */
528 protected Chapter makeChapter(URL source, int number, String name,
529 String content) throws IOException {
530 // Chapter name: process it correctly, then remove the possible
531 // redundant "Chapter x: " in front of it
532 String chapterName = processPara(name).getContent().trim();
533 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
534 .split(",")) {
535 String chapterWord = Instance.getConfig().getStringX(
536 Config.CHAPTER, lang);
537 if (chapterName.startsWith(chapterWord)) {
538 chapterName = chapterName.substring(chapterWord.length())
539 .trim();
540 break;
541 }
542 }
543
544 if (chapterName.startsWith(Integer.toString(number))) {
545 chapterName = chapterName.substring(
546 Integer.toString(number).length()).trim();
547 }
548
549 if (chapterName.startsWith(":")) {
550 chapterName = chapterName.substring(1).trim();
551 }
552 //
553
554 Chapter chap = new Chapter(number, chapterName);
555
556 if (content != null) {
557 chap.setParagraphs(makeParagraphs(source, content));
558 }
559
560 return chap;
561
562 }
563
564 /**
565 * Convert the given content into {@link Paragraph}s.
566 *
567 * @param source
568 * the source URL of the story
569 * @param content
570 * the textual content
571 *
572 * @return the {@link Paragraph}s
573 *
574 * @throws IOException
575 * in case of I/O error
576 */
577 protected List<Paragraph> makeParagraphs(URL source, String content)
578 throws IOException {
579 if (isHtml()) {
580 // Special <HR> processing:
581 content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
582 "\n* * *\n");
583 }
584
585 List<Paragraph> paras = new ArrayList<Paragraph>();
586 InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8"));
587 try {
588 BufferedReader buff = new BufferedReader(new InputStreamReader(in,
589 "UTF-8"));
590
591 for (String encodedLine = buff.readLine(); encodedLine != null; encodedLine = buff
592 .readLine()) {
593 String lines[];
594 if (isHtml()) {
595 lines = encodedLine.split("(<p>|</p>|<br>|<br/>|\\n)");
596 } else {
597 lines = new String[] { encodedLine };
598 }
599
600 for (String aline : lines) {
601 String line = aline.trim();
602
603 URL image = null;
604 if (line.startsWith("[") && line.endsWith("]")) {
605 image = getImageUrl(this, source,
606 line.substring(1, line.length() - 1).trim());
607 }
608
609 if (image != null) {
610 paras.add(new Paragraph(image));
611 } else {
612 paras.add(processPara(line));
613 }
614 }
615 }
616 } finally {
617 in.close();
618 }
619
620 // Check quotes for "bad" format
621 List<Paragraph> newParas = new ArrayList<Paragraph>();
622 for (Paragraph para : paras) {
623 newParas.addAll(requotify(para));
624 }
625 paras = newParas;
626
627 // Remove double blanks/brks
628 fixBlanksBreaks(paras);
629
630 return paras;
631 }
632
633 /**
634 * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
635 * those {@link Paragraph}s.
636 * <p>
637 * The resulting list will not contain a starting or trailing blank/break
638 * nor 2 blanks or breaks following each other.
639 *
640 * @param paras
641 * the list of {@link Paragraph}s to fix
642 */
643 protected void fixBlanksBreaks(List<Paragraph> paras) {
644 boolean space = false;
645 boolean brk = true;
646 for (int i = 0; i < paras.size(); i++) {
647 Paragraph para = paras.get(i);
648 boolean thisSpace = para.getType() == ParagraphType.BLANK;
649 boolean thisBrk = para.getType() == ParagraphType.BREAK;
650
651 if (i > 0 && space && thisBrk) {
652 paras.remove(i - 1);
653 i--;
654 } else if ((space || brk) && (thisSpace || thisBrk)) {
655 paras.remove(i);
656 i--;
657 }
658
659 space = thisSpace;
660 brk = thisBrk;
661 }
662
663 // Remove blank/brk at start
664 if (paras.size() > 0
665 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
666 0).getType() == ParagraphType.BREAK)) {
667 paras.remove(0);
668 }
669
670 // Remove blank/brk at end
671 int last = paras.size() - 1;
672 if (paras.size() > 0
673 && (paras.get(last).getType() == ParagraphType.BLANK || paras
674 .get(last).getType() == ParagraphType.BREAK)) {
675 paras.remove(last);
676 }
677 }
678
679 /**
680 * Get the default cover related to this subject (see <tt>.info</tt> files).
681 *
682 * @param subject
683 * the subject
684 *
685 * @return the cover if any, or NULL
686 */
687 static BufferedImage getDefaultCover(String subject) {
688 if (subject != null && !subject.isEmpty()
689 && Instance.getCoverDir() != null) {
690 try {
691 File fileCover = new File(Instance.getCoverDir(), subject);
692 return getImage(null, fileCover.toURI().toURL(), subject);
693 } catch (MalformedURLException e) {
694 }
695 }
696
697 return null;
698 }
699
700 /**
701 * Return the list of supported image extensions.
702 *
703 * @param emptyAllowed
704 * TRUE to allow an empty extension on first place, which can be
705 * used when you may already have an extension in your input but
706 * are not sure about it
707 *
708 * @return the extensions
709 */
710 static String[] getImageExt(boolean emptyAllowed) {
711 if (emptyAllowed) {
712 return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
713 } else {
714 return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
715 }
716 }
717
718 /**
719 * Check if the given resource can be a local image or a remote image, then
720 * refresh the cache with it if it is.
721 *
722 * @param source
723 * the story source
724 * @param line
725 * the resource to check
726 *
727 * @return the image if found, or NULL
728 *
729 */
730 static BufferedImage getImage(BasicSupport support, URL source, String line) {
731 URL url = getImageUrl(support, source, line);
732 if (url != null) {
733 InputStream in = null;
734 try {
735 in = Instance.getCache().open(url, getSupport(url), true);
736 return IOUtils.toImage(in);
737 } catch (IOException e) {
738 } finally {
739 if (in != null) {
740 try {
741 in.close();
742 } catch (IOException e) {
743 }
744 }
745 }
746 }
747
748 return null;
749 }
750
751 /**
752 * Check if the given resource can be a local image or a remote image, then
753 * refresh the cache with it if it is.
754 *
755 * @param source
756 * the story source
757 * @param line
758 * the resource to check
759 *
760 * @return the image URL if found, or NULL
761 *
762 */
763 static URL getImageUrl(BasicSupport support, URL source, String line) {
764 URL url = null;
765
766 if (line != null) {
767 // try for files
768 String path = null;
769 if (source != null) {
770 path = new File(source.getFile()).getParent();
771 try {
772 String basePath = new File(new File(path), line.trim())
773 .getAbsolutePath();
774 for (String ext : getImageExt(true)) {
775 if (new File(basePath + ext).exists()) {
776 url = new File(basePath + ext).toURI().toURL();
777 }
778 }
779 } catch (Exception e) {
780 // Nothing to do here
781 }
782 }
783
784 if (url == null) {
785 // try for URLs
786 try {
787 for (String ext : getImageExt(true)) {
788 if (Instance.getCache().check(new URL(line + ext))) {
789 url = new URL(line + ext);
790 break;
791 }
792 }
793
794 // try out of cache
795 if (url == null) {
796 for (String ext : getImageExt(true)) {
797 try {
798 url = new URL(line + ext);
799 Instance.getCache().refresh(url, support, true);
800 break;
801 } catch (IOException e) {
802 // no image with this ext
803 url = null;
804 }
805 }
806 }
807 } catch (MalformedURLException e) {
808 // Not an url
809 }
810 }
811
812 // refresh the cached file
813 if (url != null) {
814 try {
815 Instance.getCache().refresh(url, support, true);
816 } catch (IOException e) {
817 // woops, broken image
818 url = null;
819 }
820 }
821 }
822
823 return url;
824 }
825
826 /**
827 * Open the input file that will be used through the support.
828 *
829 * @param source
830 * the source {@link URL}
831 *
832 * @return the {@link InputStream}
833 *
834 * @throws IOException
835 * in case of I/O error
836 */
837 protected InputStream openInput(URL source) throws IOException {
838 return Instance.getCache().open(source, this, false);
839 }
840
841 /**
842 * Reset the given {@link InputStream} and return it.
843 *
844 * @param in
845 * the {@link InputStream} to reset
846 *
847 * @return the same {@link InputStream} after reset
848 */
849 protected InputStream reset(InputStream in) {
850 try {
851 in.reset();
852 } catch (IOException e) {
853 }
854 return in;
855 }
856
857 /**
858 * Reset then return {@link BasicSupport#in}.
859 *
860 * @return {@link BasicSupport#in}
861 */
862 protected InputStream getInput() {
863 return reset(in);
864 }
865
866 /**
867 * Fix the author name if it is prefixed with some "by" {@link String}.
868 *
869 * @param author
870 * the author with a possible prefix
871 *
872 * @return the author without prefixes
873 */
874 protected String fixAuthor(String author) {
875 if (author != null) {
876 for (String suffix : new String[] { " ", ":" }) {
877 for (String byString : Instance.getConfig()
878 .getString(Config.BYS).split(",")) {
879 byString += suffix;
880 if (author.toUpperCase().startsWith(byString.toUpperCase())) {
881 author = author.substring(byString.length()).trim();
882 }
883 }
884 }
885
886 // Special case (without suffix):
887 if (author.startsWith("©")) {
888 author = author.substring(1);
889 }
890 }
891
892 return author;
893 }
894
895 /**
896 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
897 * and requotify them (i.e., separate them into QUOTE paragraphs and other
898 * paragraphs (quotes or not)).
899 *
900 * @param para
901 * the paragraph to requotify (not necessarily a quote)
902 *
903 * @return the correctly (or so we hope) quotified paragraphs
904 */
905 protected List<Paragraph> requotify(Paragraph para) {
906 List<Paragraph> newParas = new ArrayList<Paragraph>();
907
908 if (para.getType() == ParagraphType.QUOTE
909 && para.getContent().length() > 2) {
910 String line = para.getContent();
911 boolean singleQ = line.startsWith("" + openQuote);
912 boolean doubleQ = line.startsWith("" + openDoubleQuote);
913
914 // Do not try when more than one quote at a time
915 // (some stories are not easily readable if we do)
916 if (singleQ
917 && line.indexOf(closeQuote, 1) < line
918 .lastIndexOf(closeQuote)) {
919 newParas.add(para);
920 return newParas;
921 }
922 if (doubleQ
923 && line.indexOf(closeDoubleQuote, 1) < line
924 .lastIndexOf(closeDoubleQuote)) {
925 newParas.add(para);
926 return newParas;
927 }
928 //
929
930 if (!singleQ && !doubleQ) {
931 line = openDoubleQuote + line + closeDoubleQuote;
932 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
933 } else {
934 char open = singleQ ? openQuote : openDoubleQuote;
935 char close = singleQ ? closeQuote : closeDoubleQuote;
936
937 int posDot = -1;
938 boolean inQuote = false;
939 int i = 0;
940 for (char car : line.toCharArray()) {
941 if (car == open) {
942 inQuote = true;
943 } else if (car == close) {
944 inQuote = false;
945 } else if (car == '.' && !inQuote) {
946 posDot = i;
947 break;
948 }
949 i++;
950 }
951
952 if (posDot >= 0) {
953 String rest = line.substring(posDot + 1).trim();
954 line = line.substring(0, posDot + 1).trim();
955 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
956 if (!rest.isEmpty()) {
957 newParas.addAll(requotify(processPara(rest)));
958 }
959 } else {
960 newParas.add(para);
961 }
962 }
963 } else {
964 newParas.add(para);
965 }
966
967 return newParas;
968 }
969
970 /**
971 * Process a {@link Paragraph} from a raw line of text.
972 * <p>
973 * Will also fix quotes and HTML encoding if needed.
974 *
975 * @param line
976 * the raw line
977 *
978 * @return the processed {@link Paragraph}
979 */
980 protected Paragraph processPara(String line) {
981 line = ifUnhtml(line).trim();
982
983 boolean space = true;
984 boolean brk = true;
985 boolean quote = false;
986 boolean tentativeCloseQuote = false;
987 char prev = '\0';
988 int dashCount = 0;
989
990 StringBuilder builder = new StringBuilder();
991 for (char car : line.toCharArray()) {
992 if (car != '-') {
993 if (dashCount > 0) {
994 // dash, ndash and mdash: - – —
995 // currently: always use mdash
996 builder.append(dashCount == 1 ? '-' : '—');
997 }
998 dashCount = 0;
999 }
1000
1001 if (tentativeCloseQuote) {
1002 tentativeCloseQuote = false;
1003 if (Character.isLetterOrDigit(car)) {
1004 builder.append("'");
1005 } else {
1006 // handle double-single quotes as double quotes
1007 if (prev == car) {
1008 builder.append(closeDoubleQuote);
1009 continue;
1010 } else {
1011 builder.append(closeQuote);
1012 }
1013 }
1014 }
1015
1016 switch (car) {
1017 case ' ': // note: unbreakable space
1018 case ' ':
1019 case '\t':
1020 case '\n': // just in case
1021 case '\r': // just in case
1022 builder.append(' ');
1023 break;
1024
1025 case '\'':
1026 if (space || (brk && quote)) {
1027 quote = true;
1028 // handle double-single quotes as double quotes
1029 if (prev == car) {
1030 builder.deleteCharAt(builder.length() - 1);
1031 builder.append(openDoubleQuote);
1032 } else {
1033 builder.append(openQuote);
1034 }
1035 } else if (prev == ' ' || prev == car) {
1036 // handle double-single quotes as double quotes
1037 if (prev == car) {
1038 builder.deleteCharAt(builder.length() - 1);
1039 builder.append(openDoubleQuote);
1040 } else {
1041 builder.append(openQuote);
1042 }
1043 } else {
1044 // it is a quote ("I'm off") or a 'quote' ("This
1045 // 'good' restaurant"...)
1046 tentativeCloseQuote = true;
1047 }
1048 break;
1049
1050 case '"':
1051 if (space || (brk && quote)) {
1052 quote = true;
1053 builder.append(openDoubleQuote);
1054 } else if (prev == ' ') {
1055 builder.append(openDoubleQuote);
1056 } else {
1057 builder.append(closeDoubleQuote);
1058 }
1059 break;
1060
1061 case '-':
1062 if (space) {
1063 quote = true;
1064 } else {
1065 dashCount++;
1066 }
1067 space = false;
1068 break;
1069
1070 case '*':
1071 case '~':
1072 case '/':
1073 case '\\':
1074 case '<':
1075 case '>':
1076 case '=':
1077 case '+':
1078 case '_':
1079 case '–':
1080 case '—':
1081 space = false;
1082 builder.append(car);
1083 break;
1084
1085 case '‘':
1086 case '`':
1087 case '‹':
1088 case '﹁':
1089 case '〈':
1090 case '「':
1091 if (space || (brk && quote)) {
1092 quote = true;
1093 builder.append(openQuote);
1094 } else {
1095 // handle double-single quotes as double quotes
1096 if (prev == car) {
1097 builder.deleteCharAt(builder.length() - 1);
1098 builder.append(openDoubleQuote);
1099 } else {
1100 builder.append(openQuote);
1101 }
1102 }
1103 space = false;
1104 brk = false;
1105 break;
1106
1107 case '’':
1108 case '›':
1109 case '﹂':
1110 case '〉':
1111 case '」':
1112 space = false;
1113 brk = false;
1114 // handle double-single quotes as double quotes
1115 if (prev == car) {
1116 builder.deleteCharAt(builder.length() - 1);
1117 builder.append(closeDoubleQuote);
1118 } else {
1119 builder.append(closeQuote);
1120 }
1121 break;
1122
1123 case '«':
1124 case '“':
1125 case '﹃':
1126 case '《':
1127 case '『':
1128 if (space || (brk && quote)) {
1129 quote = true;
1130 builder.append(openDoubleQuote);
1131 } else {
1132 builder.append(openDoubleQuote);
1133 }
1134 space = false;
1135 brk = false;
1136 break;
1137
1138 case '»':
1139 case '”':
1140 case '﹄':
1141 case '》':
1142 case '』':
1143 space = false;
1144 brk = false;
1145 builder.append(closeDoubleQuote);
1146 break;
1147
1148 default:
1149 space = false;
1150 brk = false;
1151 builder.append(car);
1152 break;
1153 }
1154
1155 prev = car;
1156 }
1157
1158 if (tentativeCloseQuote) {
1159 tentativeCloseQuote = false;
1160 builder.append(closeQuote);
1161 }
1162
1163 line = builder.toString().trim();
1164
1165 ParagraphType type = ParagraphType.NORMAL;
1166 if (space) {
1167 type = ParagraphType.BLANK;
1168 } else if (brk) {
1169 type = ParagraphType.BREAK;
1170 } else if (quote) {
1171 type = ParagraphType.QUOTE;
1172 }
1173
1174 return new Paragraph(type, line);
1175 }
1176
1177 /**
1178 * Remove the HTML from the input <b>if</b> {@link BasicSupport#isHtml()} is
1179 * true.
1180 *
1181 * @param input
1182 * the input
1183 *
1184 * @return the no html version if needed
1185 */
1186 private String ifUnhtml(String input) {
1187 if (isHtml() && input != null) {
1188 return StringUtils.unhtml(input);
1189 }
1190
1191 return input;
1192 }
1193
1194 /**
1195 * Return a {@link BasicSupport} implementation supporting the given
1196 * resource if possible.
1197 *
1198 * @param url
1199 * the story resource
1200 *
1201 * @return an implementation that supports it, or NULL
1202 */
1203 public static BasicSupport getSupport(URL url) {
1204 if (url == null) {
1205 return null;
1206 }
1207
1208 // TEXT and INFO_TEXT always support files (not URLs though)
1209 for (SupportType type : SupportType.values()) {
1210 if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1211 BasicSupport support = getSupport(type);
1212 if (support != null && support.supports(url)) {
1213 return support;
1214 }
1215 }
1216 }
1217
1218 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
1219 SupportType.TEXT }) {
1220 BasicSupport support = getSupport(type);
1221 if (support != null && support.supports(url)) {
1222 return support;
1223 }
1224 }
1225
1226 return null;
1227 }
1228
1229 /**
1230 * Return a {@link BasicSupport} implementation supporting the given type.
1231 *
1232 * @param type
1233 * the type
1234 *
1235 * @return an implementation that supports it, or NULL
1236 */
1237 public static BasicSupport getSupport(SupportType type) {
1238 switch (type) {
1239 case EPUB:
1240 return new Epub().setType(type);
1241 case INFO_TEXT:
1242 return new InfoText().setType(type);
1243 case FIMFICTION:
1244 return new Fimfiction().setType(type);
1245 case FANFICTION:
1246 return new Fanfiction().setType(type);
1247 case TEXT:
1248 return new Text().setType(type);
1249 case MANGAFOX:
1250 return new MangaFox().setType(type);
1251 case E621:
1252 return new E621().setType(type);
1253 case YIFFSTAR:
1254 return new YiffStar().setType(type);
1255 case CBZ:
1256 return new Cbz().setType(type);
1257 case HTML:
1258 return new Html().setType(type);
1259 }
1260
1261 return null;
1262 }
1263
1264 /**
1265 * Return the first line from the given input which correspond to the given
1266 * selectors.
1267 *
1268 * @param in
1269 * the input
1270 * @param needle
1271 * a string that must be found inside the target line (also
1272 * supports "^" at start to say "only if it starts with" the
1273 * needle)
1274 * @param relativeLine
1275 * the line to return based upon the target line position (-1 =
1276 * the line before, 0 = the target line...)
1277 *
1278 * @return the line
1279 */
1280 static String getLine(InputStream in, String needle, int relativeLine) {
1281 return getLine(in, needle, relativeLine, true);
1282 }
1283
1284 /**
1285 * Return a line from the given input which correspond to the given
1286 * selectors.
1287 *
1288 * @param in
1289 * the input
1290 * @param needle
1291 * a string that must be found inside the target line (also
1292 * supports "^" at start to say "only if it starts with" the
1293 * needle)
1294 * @param relativeLine
1295 * the line to return based upon the target line position (-1 =
1296 * the line before, 0 = the target line...)
1297 * @param first
1298 * takes the first result (as opposed to the last one, which will
1299 * also always spend the input)
1300 *
1301 * @return the line
1302 */
1303 static String getLine(InputStream in, String needle, int relativeLine,
1304 boolean first) {
1305 String rep = null;
1306
1307 try {
1308 in.reset();
1309 } catch (IOException e) {
1310 Instance.syserr(e);
1311 }
1312
1313 List<String> lines = new ArrayList<String>();
1314 @SuppressWarnings("resource")
1315 Scanner scan = new Scanner(in, "UTF-8");
1316 int index = -1;
1317 scan.useDelimiter("\\n");
1318 while (scan.hasNext()) {
1319 lines.add(scan.next());
1320
1321 if (index == -1) {
1322 if (needle.startsWith("^")) {
1323 if (lines.get(lines.size() - 1).startsWith(
1324 needle.substring(1))) {
1325 index = lines.size() - 1;
1326 }
1327
1328 } else {
1329 if (lines.get(lines.size() - 1).contains(needle)) {
1330 index = lines.size() - 1;
1331 }
1332 }
1333 }
1334
1335 if (index >= 0 && index + relativeLine < lines.size()) {
1336 rep = lines.get(index + relativeLine);
1337 if (first) {
1338 break;
1339 }
1340 }
1341 }
1342
1343 return rep;
1344 }
1345 }