New support: YiffStar (still no logged-in content)
[fanfix.git] / src / be / nikiroo / fanfix / supported / BasicSupport.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.awt.image.BufferedImage;
4 import java.io.BufferedReader;
5 import java.io.ByteArrayInputStream;
6 import java.io.File;
7 import java.io.IOException;
8 import java.io.InputStream;
9 import java.io.InputStreamReader;
10 import java.net.MalformedURLException;
11 import java.net.URL;
12 import java.util.ArrayList;
13 import java.util.HashMap;
14 import java.util.List;
15 import java.util.Map;
16 import java.util.Map.Entry;
17 import java.util.Scanner;
18
19 import be.nikiroo.fanfix.Instance;
20 import be.nikiroo.fanfix.bundles.Config;
21 import be.nikiroo.fanfix.bundles.StringId;
22 import be.nikiroo.fanfix.data.Chapter;
23 import be.nikiroo.fanfix.data.MetaData;
24 import be.nikiroo.fanfix.data.Paragraph;
25 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
26 import be.nikiroo.fanfix.data.Story;
27 import be.nikiroo.utils.IOUtils;
28 import be.nikiroo.utils.Progress;
29 import be.nikiroo.utils.StringUtils;
30
31 /**
32 * This class is the base class used by the other support classes. It can be
33 * used outside of this package, and have static method that you can use to get
34 * access to the correct support class.
35 * <p>
36 * It will be used with 'resources' (usually web pages or files).
37 *
38 * @author niki
39 */
40 public abstract class BasicSupport {
41 /**
42 * The supported input types for which we can get a {@link BasicSupport}
43 * object.
44 *
45 * @author niki
46 */
47 public enum SupportType {
48 /** EPUB files created with this program */
49 EPUB,
50 /** Pure text file with some rules */
51 TEXT,
52 /** TEXT but with associated .info file */
53 INFO_TEXT,
54 /** My Little Pony fanfictions */
55 FIMFICTION,
56 /** Fanfictions from a lot of different universes */
57 FANFICTION,
58 /** Website with lots of Mangas */
59 MANGAFOX,
60 /** Furry website with comics support */
61 E621,
62 /** Furry website with stories */
63 YIFFSTAR,
64 /** CBZ files */
65 CBZ,
66 /** HTML files */
67 HTML;
68
69 /**
70 * A description of this support type (more information than the
71 * {@link BasicSupport#getSourceName()}).
72 *
73 * @return the description
74 */
75 public String getDesc() {
76 String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
77 this.name());
78
79 if (desc == null) {
80 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
81 }
82
83 return desc;
84 }
85
86 /**
87 * The name of this support type (a short version).
88 *
89 * @return the name
90 */
91 public String getSourceName() {
92 BasicSupport support = BasicSupport.getSupport(this);
93 if (support != null) {
94 return support.getSourceName();
95 }
96
97 return null;
98 }
99
100 @Override
101 public String toString() {
102 return super.toString().toLowerCase();
103 }
104
105 /**
106 * Call {@link SupportType#valueOf(String.toUpperCase())}.
107 *
108 * @param typeName
109 * the possible type name
110 *
111 * @return NULL or the type
112 */
113 public static SupportType valueOfUC(String typeName) {
114 return SupportType.valueOf(typeName == null ? null : typeName
115 .toUpperCase());
116 }
117
118 /**
119 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
120 * NULL for NULL instead of raising exception.
121 *
122 * @param typeName
123 * the possible type name
124 *
125 * @return NULL or the type
126 */
127 public static SupportType valueOfNullOkUC(String typeName) {
128 if (typeName == null) {
129 return null;
130 }
131
132 return SupportType.valueOfUC(typeName);
133 }
134
135 /**
136 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
137 * NULL in case of error instead of raising an exception.
138 *
139 * @param typeName
140 * the possible type name
141 *
142 * @return NULL or the type
143 */
144 public static SupportType valueOfAllOkUC(String typeName) {
145 try {
146 return SupportType.valueOfUC(typeName);
147 } catch (Exception e) {
148 return null;
149 }
150 }
151 }
152
153 private InputStream in;
154 private SupportType type;
155 private URL currentReferer; // with only one 'r', as in 'HTTP'...
156
157 // quote chars
158 private char openQuote = Instance.getTrans().getChar(
159 StringId.OPEN_SINGLE_QUOTE);
160 private char closeQuote = Instance.getTrans().getChar(
161 StringId.CLOSE_SINGLE_QUOTE);
162 private char openDoubleQuote = Instance.getTrans().getChar(
163 StringId.OPEN_DOUBLE_QUOTE);
164 private char closeDoubleQuote = Instance.getTrans().getChar(
165 StringId.CLOSE_DOUBLE_QUOTE);
166
167 /**
168 * The name of this support class.
169 *
170 * @return the name
171 */
172 protected abstract String getSourceName();
173
174 /**
175 * Check if the given resource is supported by this {@link BasicSupport}.
176 *
177 * @param url
178 * the resource to check for
179 *
180 * @return TRUE if it is
181 */
182 protected abstract boolean supports(URL url);
183
184 /**
185 * Return TRUE if the support will return HTML encoded content values for
186 * the chapters content.
187 *
188 * @return TRUE for HTML
189 */
190 protected abstract boolean isHtml();
191
192 protected abstract MetaData getMeta(URL source, InputStream in)
193 throws IOException;
194
195 /**
196 * Return the story description.
197 *
198 * @param source
199 * the source of the story
200 * @param in
201 * the input (the main resource)
202 *
203 * @return the description
204 *
205 * @throws IOException
206 * in case of I/O error
207 */
208 protected abstract String getDesc(URL source, InputStream in)
209 throws IOException;
210
211 /**
212 * Return the list of chapters (name and resource).
213 *
214 * @param source
215 * the source of the story
216 * @param in
217 * the input (the main resource)
218 *
219 * @return the chapters
220 *
221 * @throws IOException
222 * in case of I/O error
223 */
224 protected abstract List<Entry<String, URL>> getChapters(URL source,
225 InputStream in) throws IOException;
226
227 /**
228 * Return the content of the chapter (possibly HTML encoded, if
229 * {@link BasicSupport#isHtml()} is TRUE).
230 *
231 * @param source
232 * the source of the story
233 * @param in
234 * the input (the main resource)
235 * @param number
236 * the chapter number
237 *
238 * @return the content
239 *
240 * @throws IOException
241 * in case of I/O error
242 */
243 protected abstract String getChapterContent(URL source, InputStream in,
244 int number) throws IOException;
245
246 /**
247 * Return the list of cookies (values included) that must be used to
248 * correctly fetch the resources.
249 * <p>
250 * You are expected to call the super method implementation if you override
251 * it.
252 *
253 * @return the cookies
254 */
255 public Map<String, String> getCookies() {
256 return new HashMap<String, String>();
257 }
258
259 /**
260 * Return the canonical form of the main {@link URL}.
261 *
262 * @param source
263 * the source {@link URL}
264 *
265 * @return the canonical form of this {@link URL}
266 *
267 * @throws IOException
268 * in case of I/O error
269 */
270 public URL getCanonicalUrl(URL source) throws IOException {
271 return source;
272 }
273
274 /**
275 * Process the given story resource into a partially filled {@link Story}
276 * object containing the name and metadata, except for the description.
277 *
278 * @param url
279 * the story resource
280 *
281 * @return the {@link Story}
282 *
283 * @throws IOException
284 * in case of I/O error
285 */
286 public Story processMeta(URL url) throws IOException {
287 return processMeta(url, true, false);
288 }
289
290 /**
291 * Process the given story resource into a partially filled {@link Story}
292 * object containing the name and metadata.
293 *
294 * @param url
295 * the story resource
296 *
297 * @param close
298 * close "this" and "in" when done
299 *
300 * @return the {@link Story}
301 *
302 * @throws IOException
303 * in case of I/O error
304 */
305 protected Story processMeta(URL url, boolean close, boolean getDesc)
306 throws IOException {
307 url = getCanonicalUrl(url);
308
309 setCurrentReferer(url);
310
311 in = openInput(url);
312 if (in == null) {
313 return null;
314 }
315
316 try {
317 preprocess(url, getInput());
318
319 Story story = new Story();
320 MetaData meta = getMeta(url, getInput());
321 story.setMeta(meta);
322
323 if (meta != null && meta.getCover() == null) {
324 meta.setCover(getDefaultCover(meta.getSubject()));
325 }
326
327 if (getDesc) {
328 String descChapterName = Instance.getTrans().getString(
329 StringId.DESCRIPTION);
330 story.getMeta().setResume(
331 makeChapter(url, 0, descChapterName,
332 getDesc(url, getInput())));
333 }
334
335 return story;
336 } finally {
337 if (close) {
338 try {
339 close();
340 } catch (IOException e) {
341 Instance.syserr(e);
342 }
343
344 if (in != null) {
345 in.close();
346 }
347 }
348
349 setCurrentReferer(null);
350 }
351 }
352
353 /**
354 * Process the given story resource into a fully filled {@link Story}
355 * object.
356 *
357 * @param url
358 * the story resource
359 * @param pg
360 * the optional progress reporter
361 *
362 * @return the {@link Story}
363 *
364 * @throws IOException
365 * in case of I/O error
366 */
367 public Story process(URL url, Progress pg) throws IOException {
368 if (pg == null) {
369 pg = new Progress();
370 } else {
371 pg.setMinMax(0, 100);
372 }
373
374 url = getCanonicalUrl(url);
375 pg.setProgress(1);
376 try {
377 Story story = processMeta(url, false, true);
378 pg.setProgress(10);
379 if (story == null) {
380 pg.setProgress(100);
381 return null;
382 }
383
384 setCurrentReferer(url);
385
386 story.setChapters(new ArrayList<Chapter>());
387
388 List<Entry<String, URL>> chapters = getChapters(url, getInput());
389 pg.setProgress(20);
390
391 int i = 1;
392 if (chapters != null) {
393 Progress pgChaps = new Progress(0, chapters.size());
394 pg.addProgress(pgChaps, 80);
395
396 for (Entry<String, URL> chap : chapters) {
397 setCurrentReferer(chap.getValue());
398 InputStream chapIn = Instance.getCache().open(
399 chap.getValue(), this, true);
400 try {
401 story.getChapters().add(
402 makeChapter(url, i, chap.getKey(),
403 getChapterContent(url, chapIn, i)));
404 } finally {
405 chapIn.close();
406 }
407
408 pgChaps.setProgress(i++);
409 }
410 } else {
411 pg.setProgress(100);
412 }
413
414 return story;
415
416 } finally {
417 try {
418 close();
419 } catch (IOException e) {
420 Instance.syserr(e);
421 }
422
423 if (in != null) {
424 in.close();
425 }
426
427 setCurrentReferer(null);
428 }
429 }
430
431 /**
432 * The support type.
433 *
434 * @return the type
435 */
436 public SupportType getType() {
437 return type;
438 }
439
440 /**
441 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
442 * the current {@link URL} we work on.
443 *
444 * @return the referer
445 */
446 public URL getCurrentReferer() {
447 return currentReferer;
448 }
449
450 /**
451 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
452 * the current {@link URL} we work on.
453 *
454 * @param currentReferer
455 * the new referer
456 */
457 protected void setCurrentReferer(URL currentReferer) {
458 this.currentReferer = currentReferer;
459 }
460
461 /**
462 * The support type.
463 *
464 * @param type
465 * the new type
466 *
467 * @return this
468 */
469 protected BasicSupport setType(SupportType type) {
470 this.type = type;
471 return this;
472 }
473
474 /**
475 * Prepare the support if needed before processing.
476 *
477 * @param source
478 * the source of the story
479 * @param in
480 * the input (the main resource)
481 *
482 * @throws IOException
483 * on I/O error
484 */
485 protected void preprocess(URL source, InputStream in) throws IOException {
486 }
487
488 /**
489 * Now that we have processed the {@link Story}, close the resources if any.
490 *
491 * @throws IOException
492 * on I/O error
493 */
494 protected void close() throws IOException {
495 }
496
497 /**
498 * Create a {@link Chapter} object from the given information, formatting
499 * the content as it should be.
500 *
501 * @param number
502 * the chapter number
503 * @param name
504 * the chapter name
505 * @param content
506 * the chapter content
507 *
508 * @return the {@link Chapter}
509 *
510 * @throws IOException
511 * in case of I/O error
512 */
513 protected Chapter makeChapter(URL source, int number, String name,
514 String content) throws IOException {
515 // Chapter name: process it correctly, then remove the possible
516 // redundant "Chapter x: " in front of it
517 String chapterName = processPara(name).getContent().trim();
518 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
519 .split(",")) {
520 String chapterWord = Instance.getConfig().getStringX(
521 Config.CHAPTER, lang);
522 if (chapterName.startsWith(chapterWord)) {
523 chapterName = chapterName.substring(chapterWord.length())
524 .trim();
525 break;
526 }
527 }
528
529 if (chapterName.startsWith(Integer.toString(number))) {
530 chapterName = chapterName.substring(
531 Integer.toString(number).length()).trim();
532 }
533
534 if (chapterName.startsWith(":")) {
535 chapterName = chapterName.substring(1).trim();
536 }
537 //
538
539 Chapter chap = new Chapter(number, chapterName);
540
541 if (content != null) {
542 chap.setParagraphs(makeParagraphs(source, content));
543 }
544
545 return chap;
546
547 }
548
549 /**
550 * Convert the given content into {@link Paragraph}s.
551 *
552 * @param source
553 * the source URL of the story
554 * @param content
555 * the textual content
556 *
557 * @return the {@link Paragraph}s
558 *
559 * @throws IOException
560 * in case of I/O error
561 */
562 protected List<Paragraph> makeParagraphs(URL source, String content)
563 throws IOException {
564 if (isHtml()) {
565 // Special <HR> processing:
566 content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
567 "\n* * *\n");
568 }
569
570 List<Paragraph> paras = new ArrayList<Paragraph>();
571 InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8"));
572 try {
573 BufferedReader buff = new BufferedReader(new InputStreamReader(in,
574 "UTF-8"));
575
576 for (String encodedLine = buff.readLine(); encodedLine != null; encodedLine = buff
577 .readLine()) {
578 String lines[];
579 if (isHtml()) {
580 lines = encodedLine.split("(<p>|</p>|<br>|<br/>|\\n)");
581 } else {
582 lines = new String[] { encodedLine };
583 }
584
585 for (String aline : lines) {
586 String line = aline.trim();
587
588 URL image = null;
589 if (line.startsWith("[") && line.endsWith("]")) {
590 image = getImageUrl(this, source,
591 line.substring(1, line.length() - 1).trim());
592 }
593
594 if (image != null) {
595 paras.add(new Paragraph(image));
596 } else {
597 paras.add(processPara(line));
598 }
599 }
600 }
601 } finally {
602 in.close();
603 }
604
605 // Check quotes for "bad" format
606 List<Paragraph> newParas = new ArrayList<Paragraph>();
607 for (Paragraph para : paras) {
608 newParas.addAll(requotify(para));
609 }
610 paras = newParas;
611
612 // Remove double blanks/brks
613 fixBlanksBreaks(paras);
614
615 return paras;
616 }
617
618 /**
619 * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
620 * those {@link Paragraph}s.
621 * <p>
622 * The resulting list will not contain a starting or trailing blank/break
623 * nor 2 blanks or breaks following each other.
624 *
625 * @param paras
626 * the list of {@link Paragraph}s to fix
627 */
628 protected void fixBlanksBreaks(List<Paragraph> paras) {
629 boolean space = false;
630 boolean brk = true;
631 for (int i = 0; i < paras.size(); i++) {
632 Paragraph para = paras.get(i);
633 boolean thisSpace = para.getType() == ParagraphType.BLANK;
634 boolean thisBrk = para.getType() == ParagraphType.BREAK;
635
636 if (i > 0 && space && thisBrk) {
637 paras.remove(i - 1);
638 i--;
639 } else if ((space || brk) && (thisSpace || thisBrk)) {
640 paras.remove(i);
641 i--;
642 }
643
644 space = thisSpace;
645 brk = thisBrk;
646 }
647
648 // Remove blank/brk at start
649 if (paras.size() > 0
650 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
651 0).getType() == ParagraphType.BREAK)) {
652 paras.remove(0);
653 }
654
655 // Remove blank/brk at end
656 int last = paras.size() - 1;
657 if (paras.size() > 0
658 && (paras.get(last).getType() == ParagraphType.BLANK || paras
659 .get(last).getType() == ParagraphType.BREAK)) {
660 paras.remove(last);
661 }
662 }
663
664 /**
665 * Get the default cover related to this subject (see <tt>.info</tt> files).
666 *
667 * @param subject
668 * the subject
669 *
670 * @return the cover if any, or NULL
671 */
672 static BufferedImage getDefaultCover(String subject) {
673 if (subject != null && !subject.isEmpty()
674 && Instance.getCoverDir() != null) {
675 try {
676 File fileCover = new File(Instance.getCoverDir(), subject);
677 return getImage(null, fileCover.toURI().toURL(), subject);
678 } catch (MalformedURLException e) {
679 }
680 }
681
682 return null;
683 }
684
685 /**
686 * Return the list of supported image extensions.
687 *
688 * @param emptyAllowed
689 * TRUE to allow an empty extension on first place, which can be
690 * used when you may already have an extension in your input but
691 * are not sure about it
692 *
693 * @return the extensions
694 */
695 static String[] getImageExt(boolean emptyAllowed) {
696 if (emptyAllowed) {
697 return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
698 } else {
699 return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
700 }
701 }
702
703 /**
704 * Check if the given resource can be a local image or a remote image, then
705 * refresh the cache with it if it is.
706 *
707 * @param source
708 * the story source
709 * @param line
710 * the resource to check
711 *
712 * @return the image if found, or NULL
713 *
714 */
715 static BufferedImage getImage(BasicSupport support, URL source, String line) {
716 URL url = getImageUrl(support, source, line);
717 if (url != null) {
718 InputStream in = null;
719 try {
720 in = Instance.getCache().open(url, getSupport(url), true);
721 return IOUtils.toImage(in);
722 } catch (IOException e) {
723 } finally {
724 if (in != null) {
725 try {
726 in.close();
727 } catch (IOException e) {
728 }
729 }
730 }
731 }
732
733 return null;
734 }
735
736 /**
737 * Check if the given resource can be a local image or a remote image, then
738 * refresh the cache with it if it is.
739 *
740 * @param source
741 * the story source
742 * @param line
743 * the resource to check
744 *
745 * @return the image URL if found, or NULL
746 *
747 */
748 static URL getImageUrl(BasicSupport support, URL source, String line) {
749 URL url = null;
750
751 if (line != null) {
752 // try for files
753 String path = null;
754 if (source != null) {
755 path = new File(source.getFile()).getParent();
756 try {
757 String basePath = new File(new File(path), line.trim())
758 .getAbsolutePath();
759 for (String ext : getImageExt(true)) {
760 if (new File(basePath + ext).exists()) {
761 url = new File(basePath + ext).toURI().toURL();
762 }
763 }
764 } catch (Exception e) {
765 // Nothing to do here
766 }
767 }
768
769 if (url == null) {
770 // try for URLs
771 try {
772 for (String ext : getImageExt(true)) {
773 if (Instance.getCache().check(new URL(line + ext))) {
774 url = new URL(line + ext);
775 break;
776 }
777 }
778
779 // try out of cache
780 if (url == null) {
781 for (String ext : getImageExt(true)) {
782 try {
783 url = new URL(line + ext);
784 Instance.getCache().refresh(url, support, true);
785 break;
786 } catch (IOException e) {
787 // no image with this ext
788 url = null;
789 }
790 }
791 }
792 } catch (MalformedURLException e) {
793 // Not an url
794 }
795 }
796
797 // refresh the cached file
798 if (url != null) {
799 try {
800 Instance.getCache().refresh(url, support, true);
801 } catch (IOException e) {
802 // woops, broken image
803 url = null;
804 }
805 }
806 }
807
808 return url;
809 }
810
811 /**
812 * Open the input file that will be used through the support.
813 *
814 * @param source
815 * the source {@link URL}
816 *
817 * @return the {@link InputStream}
818 *
819 * @throws IOException
820 * in case of I/O error
821 */
822 protected InputStream openInput(URL source) throws IOException {
823 return Instance.getCache().open(source, this, false);
824 }
825
826 /**
827 * Reset the given {@link InputStream} and return it.
828 *
829 * @param in
830 * the {@link InputStream} to reset
831 *
832 * @return the same {@link InputStream} after reset
833 */
834 protected InputStream reset(InputStream in) {
835 try {
836 in.reset();
837 } catch (IOException e) {
838 }
839 return in;
840 }
841
842 /**
843 * Reset then return {@link BasicSupport#in}.
844 *
845 * @return {@link BasicSupport#in}
846 */
847 protected InputStream getInput() {
848 return reset(in);
849 }
850
851 /**
852 * Fix the author name if it is prefixed with some "by" {@link String}.
853 *
854 * @param author
855 * the author with a possible prefix
856 *
857 * @return the author without prefixes
858 */
859 protected String fixAuthor(String author) {
860 if (author != null) {
861 for (String suffix : new String[] { " ", ":" }) {
862 for (String byString : Instance.getConfig()
863 .getString(Config.BYS).split(",")) {
864 byString += suffix;
865 if (author.toUpperCase().startsWith(byString.toUpperCase())) {
866 author = author.substring(byString.length()).trim();
867 }
868 }
869 }
870
871 // Special case (without suffix):
872 if (author.startsWith("©")) {
873 author = author.substring(1);
874 }
875 }
876
877 return author;
878 }
879
880 /**
881 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
882 * and requotify them (i.e., separate them into QUOTE paragraphs and other
883 * paragraphs (quotes or not)).
884 *
885 * @param para
886 * the paragraph to requotify (not necessarily a quote)
887 *
888 * @return the correctly (or so we hope) quotified paragraphs
889 */
890 protected List<Paragraph> requotify(Paragraph para) {
891 List<Paragraph> newParas = new ArrayList<Paragraph>();
892
893 if (para.getType() == ParagraphType.QUOTE
894 && para.getContent().length() > 2) {
895 String line = para.getContent();
896 boolean singleQ = line.startsWith("" + openQuote);
897 boolean doubleQ = line.startsWith("" + openDoubleQuote);
898
899 // Do not try when more than one quote at a time
900 // (some stories are not easily readable if we do)
901 if (singleQ
902 && line.indexOf(closeQuote, 1) < line
903 .lastIndexOf(closeQuote)) {
904 newParas.add(para);
905 return newParas;
906 }
907 if (doubleQ
908 && line.indexOf(closeDoubleQuote, 1) < line
909 .lastIndexOf(closeDoubleQuote)) {
910 newParas.add(para);
911 return newParas;
912 }
913 //
914
915 if (!singleQ && !doubleQ) {
916 line = openDoubleQuote + line + closeDoubleQuote;
917 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
918 } else {
919 char open = singleQ ? openQuote : openDoubleQuote;
920 char close = singleQ ? closeQuote : closeDoubleQuote;
921
922 int posDot = -1;
923 boolean inQuote = false;
924 int i = 0;
925 for (char car : line.toCharArray()) {
926 if (car == open) {
927 inQuote = true;
928 } else if (car == close) {
929 inQuote = false;
930 } else if (car == '.' && !inQuote) {
931 posDot = i;
932 break;
933 }
934 i++;
935 }
936
937 if (posDot >= 0) {
938 String rest = line.substring(posDot + 1).trim();
939 line = line.substring(0, posDot + 1).trim();
940 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
941 if (!rest.isEmpty()) {
942 newParas.addAll(requotify(processPara(rest)));
943 }
944 } else {
945 newParas.add(para);
946 }
947 }
948 } else {
949 newParas.add(para);
950 }
951
952 return newParas;
953 }
954
955 /**
956 * Process a {@link Paragraph} from a raw line of text.
957 * <p>
958 * Will also fix quotes and HTML encoding if needed.
959 *
960 * @param line
961 * the raw line
962 *
963 * @return the processed {@link Paragraph}
964 */
965 protected Paragraph processPara(String line) {
966 line = ifUnhtml(line).trim();
967
968 boolean space = true;
969 boolean brk = true;
970 boolean quote = false;
971 boolean tentativeCloseQuote = false;
972 char prev = '\0';
973 int dashCount = 0;
974
975 StringBuilder builder = new StringBuilder();
976 for (char car : line.toCharArray()) {
977 if (car != '-') {
978 if (dashCount > 0) {
979 // dash, ndash and mdash: - – —
980 // currently: always use mdash
981 builder.append(dashCount == 1 ? '-' : '—');
982 }
983 dashCount = 0;
984 }
985
986 if (tentativeCloseQuote) {
987 tentativeCloseQuote = false;
988 if (Character.isLetterOrDigit(car)) {
989 builder.append("'");
990 } else {
991 // handle double-single quotes as double quotes
992 if (prev == car) {
993 builder.append(closeDoubleQuote);
994 continue;
995 } else {
996 builder.append(closeQuote);
997 }
998 }
999 }
1000
1001 switch (car) {
1002 case ' ': // note: unbreakable space
1003 case ' ':
1004 case '\t':
1005 case '\n': // just in case
1006 case '\r': // just in case
1007 builder.append(' ');
1008 break;
1009
1010 case '\'':
1011 if (space || (brk && quote)) {
1012 quote = true;
1013 // handle double-single quotes as double quotes
1014 if (prev == car) {
1015 builder.deleteCharAt(builder.length() - 1);
1016 builder.append(openDoubleQuote);
1017 } else {
1018 builder.append(openQuote);
1019 }
1020 } else if (prev == ' ' || prev == car) {
1021 // handle double-single quotes as double quotes
1022 if (prev == car) {
1023 builder.deleteCharAt(builder.length() - 1);
1024 builder.append(openDoubleQuote);
1025 } else {
1026 builder.append(openQuote);
1027 }
1028 } else {
1029 // it is a quote ("I'm off") or a 'quote' ("This
1030 // 'good' restaurant"...)
1031 tentativeCloseQuote = true;
1032 }
1033 break;
1034
1035 case '"':
1036 if (space || (brk && quote)) {
1037 quote = true;
1038 builder.append(openDoubleQuote);
1039 } else if (prev == ' ') {
1040 builder.append(openDoubleQuote);
1041 } else {
1042 builder.append(closeDoubleQuote);
1043 }
1044 break;
1045
1046 case '-':
1047 if (space) {
1048 quote = true;
1049 } else {
1050 dashCount++;
1051 }
1052 space = false;
1053 break;
1054
1055 case '*':
1056 case '~':
1057 case '/':
1058 case '\\':
1059 case '<':
1060 case '>':
1061 case '=':
1062 case '+':
1063 case '_':
1064 case '–':
1065 case '—':
1066 space = false;
1067 builder.append(car);
1068 break;
1069
1070 case '‘':
1071 case '`':
1072 case '‹':
1073 case '﹁':
1074 case '〈':
1075 case '「':
1076 if (space || (brk && quote)) {
1077 quote = true;
1078 builder.append(openQuote);
1079 } else {
1080 // handle double-single quotes as double quotes
1081 if (prev == car) {
1082 builder.deleteCharAt(builder.length() - 1);
1083 builder.append(openDoubleQuote);
1084 } else {
1085 builder.append(openQuote);
1086 }
1087 }
1088 space = false;
1089 brk = false;
1090 break;
1091
1092 case '’':
1093 case '›':
1094 case '﹂':
1095 case '〉':
1096 case '」':
1097 space = false;
1098 brk = false;
1099 // handle double-single quotes as double quotes
1100 if (prev == car) {
1101 builder.deleteCharAt(builder.length() - 1);
1102 builder.append(closeDoubleQuote);
1103 } else {
1104 builder.append(closeQuote);
1105 }
1106 break;
1107
1108 case '«':
1109 case '“':
1110 case '﹃':
1111 case '《':
1112 case '『':
1113 if (space || (brk && quote)) {
1114 quote = true;
1115 builder.append(openDoubleQuote);
1116 } else {
1117 builder.append(openDoubleQuote);
1118 }
1119 space = false;
1120 brk = false;
1121 break;
1122
1123 case '»':
1124 case '”':
1125 case '﹄':
1126 case '》':
1127 case '』':
1128 space = false;
1129 brk = false;
1130 builder.append(closeDoubleQuote);
1131 break;
1132
1133 default:
1134 space = false;
1135 brk = false;
1136 builder.append(car);
1137 break;
1138 }
1139
1140 prev = car;
1141 }
1142
1143 if (tentativeCloseQuote) {
1144 tentativeCloseQuote = false;
1145 builder.append(closeQuote);
1146 }
1147
1148 line = builder.toString().trim();
1149
1150 ParagraphType type = ParagraphType.NORMAL;
1151 if (space) {
1152 type = ParagraphType.BLANK;
1153 } else if (brk) {
1154 type = ParagraphType.BREAK;
1155 } else if (quote) {
1156 type = ParagraphType.QUOTE;
1157 }
1158
1159 return new Paragraph(type, line);
1160 }
1161
1162 /**
1163 * Remove the HTML from the input <b>if</b> {@link BasicSupport#isHtml()} is
1164 * true.
1165 *
1166 * @param input
1167 * the input
1168 *
1169 * @return the no html version if needed
1170 */
1171 private String ifUnhtml(String input) {
1172 if (isHtml() && input != null) {
1173 return StringUtils.unhtml(input);
1174 }
1175
1176 return input;
1177 }
1178
1179 /**
1180 * Return a {@link BasicSupport} implementation supporting the given
1181 * resource if possible.
1182 *
1183 * @param url
1184 * the story resource
1185 *
1186 * @return an implementation that supports it, or NULL
1187 */
1188 public static BasicSupport getSupport(URL url) {
1189 if (url == null) {
1190 return null;
1191 }
1192
1193 // TEXT and INFO_TEXT always support files (not URLs though)
1194 for (SupportType type : SupportType.values()) {
1195 if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1196 BasicSupport support = getSupport(type);
1197 if (support != null && support.supports(url)) {
1198 return support;
1199 }
1200 }
1201 }
1202
1203 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
1204 SupportType.TEXT }) {
1205 BasicSupport support = getSupport(type);
1206 if (support != null && support.supports(url)) {
1207 return support;
1208 }
1209 }
1210
1211 return null;
1212 }
1213
1214 /**
1215 * Return a {@link BasicSupport} implementation supporting the given type.
1216 *
1217 * @param type
1218 * the type
1219 *
1220 * @return an implementation that supports it, or NULL
1221 */
1222 public static BasicSupport getSupport(SupportType type) {
1223 switch (type) {
1224 case EPUB:
1225 return new Epub().setType(type);
1226 case INFO_TEXT:
1227 return new InfoText().setType(type);
1228 case FIMFICTION:
1229 return new Fimfiction().setType(type);
1230 case FANFICTION:
1231 return new Fanfiction().setType(type);
1232 case TEXT:
1233 return new Text().setType(type);
1234 case MANGAFOX:
1235 return new MangaFox().setType(type);
1236 case E621:
1237 return new E621().setType(type);
1238 case YIFFSTAR:
1239 return new YiffStar().setType(type);
1240 case CBZ:
1241 return new Cbz().setType(type);
1242 case HTML:
1243 return new Html().setType(type);
1244 }
1245
1246 return null;
1247 }
1248
1249 /**
1250 * Return the first line from the given input which correspond to the given
1251 * selectors.
1252 *
1253 * @param in
1254 * the input
1255 * @param needle
1256 * a string that must be found inside the target line (also
1257 * supports "^" at start to say "only if it starts with" the
1258 * needle)
1259 * @param relativeLine
1260 * the line to return based upon the target line position (-1 =
1261 * the line before, 0 = the target line...)
1262 *
1263 * @return the line
1264 */
1265 static String getLine(InputStream in, String needle, int relativeLine) {
1266 return getLine(in, needle, relativeLine, true);
1267 }
1268
1269 /**
1270 * Return a line from the given input which correspond to the given
1271 * selectors.
1272 *
1273 * @param in
1274 * the input
1275 * @param needle
1276 * a string that must be found inside the target line (also
1277 * supports "^" at start to say "only if it starts with" the
1278 * needle)
1279 * @param relativeLine
1280 * the line to return based upon the target line position (-1 =
1281 * the line before, 0 = the target line...)
1282 * @param first
1283 * takes the first result (as opposed to the last one, which will
1284 * also always spend the input)
1285 *
1286 * @return the line
1287 */
1288 static String getLine(InputStream in, String needle, int relativeLine,
1289 boolean first) {
1290 String rep = null;
1291
1292 try {
1293 in.reset();
1294 } catch (IOException e) {
1295 Instance.syserr(e);
1296 }
1297
1298 List<String> lines = new ArrayList<String>();
1299 @SuppressWarnings("resource")
1300 Scanner scan = new Scanner(in, "UTF-8");
1301 int index = -1;
1302 scan.useDelimiter("\\n");
1303 while (scan.hasNext()) {
1304 lines.add(scan.next());
1305
1306 if (index == -1) {
1307 if (needle.startsWith("^")) {
1308 if (lines.get(lines.size() - 1).startsWith(
1309 needle.substring(1))) {
1310 index = lines.size() - 1;
1311 }
1312
1313 } else {
1314 if (lines.get(lines.size() - 1).contains(needle)) {
1315 index = lines.size() - 1;
1316 }
1317 }
1318 }
1319
1320 if (index >= 0 && index + relativeLine < lines.size()) {
1321 rep = lines.get(index + relativeLine);
1322 if (first) {
1323 break;
1324 }
1325 }
1326 }
1327
1328 return rep;
1329 }
1330 }