169cc5f02d7a0a85cc4328227bd646c408d5a8e1
[fanfix.git] / src / be / nikiroo / fanfix / supported / BasicSupport.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.awt.image.BufferedImage;
4 import java.io.BufferedReader;
5 import java.io.ByteArrayInputStream;
6 import java.io.File;
7 import java.io.IOException;
8 import java.io.InputStream;
9 import java.io.InputStreamReader;
10 import java.net.MalformedURLException;
11 import java.net.URL;
12 import java.util.ArrayList;
13 import java.util.HashMap;
14 import java.util.List;
15 import java.util.Map;
16 import java.util.Map.Entry;
17 import java.util.Scanner;
18
19 import be.nikiroo.fanfix.Instance;
20 import be.nikiroo.fanfix.bundles.Config;
21 import be.nikiroo.fanfix.bundles.StringId;
22 import be.nikiroo.fanfix.data.Chapter;
23 import be.nikiroo.fanfix.data.MetaData;
24 import be.nikiroo.fanfix.data.Paragraph;
25 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
26 import be.nikiroo.fanfix.data.Story;
27 import be.nikiroo.utils.IOUtils;
28 import be.nikiroo.utils.Progress;
29 import be.nikiroo.utils.StringUtils;
30
31 /**
32 * This class is the base class used by the other support classes. It can be
33 * used outside of this package, and have static method that you can use to get
34 * access to the correct support class.
35 * <p>
36 * It will be used with 'resources' (usually web pages or files).
37 *
38 * @author niki
39 */
40 public abstract class BasicSupport {
41 /**
42 * The supported input types for which we can get a {@link BasicSupport}
43 * object.
44 *
45 * @author niki
46 */
47 public enum SupportType {
48 /** EPUB files created with this program */
49 EPUB,
50 /** Pure text file with some rules */
51 TEXT,
52 /** TEXT but with associated .info file */
53 INFO_TEXT,
54 /** My Little Pony fanfictions */
55 FIMFICTION,
56 /** Fanfictions from a lot of different universes */
57 FANFICTION,
58 /** Website with lots of Mangas */
59 MANGAFOX,
60 /** Furry website with comics support */
61 E621,
62 /** CBZ files */
63 CBZ,
64 /** HTML files */
65 HTML;
66
67 /**
68 * A description of this support type (more information than the
69 * {@link BasicSupport#getSourceName()}).
70 *
71 * @return the description
72 */
73 public String getDesc() {
74 String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
75 this.name());
76
77 if (desc == null) {
78 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
79 }
80
81 return desc;
82 }
83
84 /**
85 * The name of this support type (a short version).
86 *
87 * @return the name
88 */
89 public String getSourceName() {
90 BasicSupport support = BasicSupport.getSupport(this);
91 if (support != null) {
92 return support.getSourceName();
93 }
94
95 return null;
96 }
97
98 @Override
99 public String toString() {
100 return super.toString().toLowerCase();
101 }
102
103 /**
104 * Call {@link SupportType#valueOf(String.toUpperCase())}.
105 *
106 * @param typeName
107 * the possible type name
108 *
109 * @return NULL or the type
110 */
111 public static SupportType valueOfUC(String typeName) {
112 return SupportType.valueOf(typeName == null ? null : typeName
113 .toUpperCase());
114 }
115
116 /**
117 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
118 * NULL for NULL instead of raising exception.
119 *
120 * @param typeName
121 * the possible type name
122 *
123 * @return NULL or the type
124 */
125 public static SupportType valueOfNullOkUC(String typeName) {
126 if (typeName == null) {
127 return null;
128 }
129
130 return SupportType.valueOfUC(typeName);
131 }
132
133 /**
134 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
135 * NULL in case of error instead of raising an exception.
136 *
137 * @param typeName
138 * the possible type name
139 *
140 * @return NULL or the type
141 */
142 public static SupportType valueOfAllOkUC(String typeName) {
143 try {
144 return SupportType.valueOfUC(typeName);
145 } catch (Exception e) {
146 return null;
147 }
148 }
149 }
150
151 private InputStream in;
152 private SupportType type;
153 private URL currentReferer; // with on 'r', as in 'HTTP'...
154
155 // quote chars
156 private char openQuote = Instance.getTrans().getChar(
157 StringId.OPEN_SINGLE_QUOTE);
158 private char closeQuote = Instance.getTrans().getChar(
159 StringId.CLOSE_SINGLE_QUOTE);
160 private char openDoubleQuote = Instance.getTrans().getChar(
161 StringId.OPEN_DOUBLE_QUOTE);
162 private char closeDoubleQuote = Instance.getTrans().getChar(
163 StringId.CLOSE_DOUBLE_QUOTE);
164
165 /**
166 * The name of this support class.
167 *
168 * @return the name
169 */
170 protected abstract String getSourceName();
171
172 /**
173 * Check if the given resource is supported by this {@link BasicSupport}.
174 *
175 * @param url
176 * the resource to check for
177 *
178 * @return TRUE if it is
179 */
180 protected abstract boolean supports(URL url);
181
182 /**
183 * Return TRUE if the support will return HTML encoded content values for
184 * the chapters content.
185 *
186 * @return TRUE for HTML
187 */
188 protected abstract boolean isHtml();
189
190 protected abstract MetaData getMeta(URL source, InputStream in)
191 throws IOException;
192
193 /**
194 * Return the story description.
195 *
196 * @param source
197 * the source of the story
198 * @param in
199 * the input (the main resource)
200 *
201 * @return the description
202 *
203 * @throws IOException
204 * in case of I/O error
205 */
206 protected abstract String getDesc(URL source, InputStream in)
207 throws IOException;
208
209 /**
210 * Return the list of chapters (name and resource).
211 *
212 * @param source
213 * the source of the story
214 * @param in
215 * the input (the main resource)
216 *
217 * @return the chapters
218 *
219 * @throws IOException
220 * in case of I/O error
221 */
222 protected abstract List<Entry<String, URL>> getChapters(URL source,
223 InputStream in) throws IOException;
224
225 /**
226 * Return the content of the chapter (possibly HTML encoded, if
227 * {@link BasicSupport#isHtml()} is TRUE).
228 *
229 * @param source
230 * the source of the story
231 * @param in
232 * the input (the main resource)
233 * @param number
234 * the chapter number
235 *
236 * @return the content
237 *
238 * @throws IOException
239 * in case of I/O error
240 */
241 protected abstract String getChapterContent(URL source, InputStream in,
242 int number) throws IOException;
243
244 /**
245 * Return the list of cookies (values included) that must be used to
246 * correctly fetch the resources.
247 * <p>
248 * You are expected to call the super method implementation if you override
249 * it.
250 *
251 * @return the cookies
252 */
253 public Map<String, String> getCookies() {
254 return new HashMap<String, String>();
255 }
256
257 /**
258 * Process the given story resource into a partially filled {@link Story}
259 * object containing the name and metadata, except for the description.
260 *
261 * @param url
262 * the story resource
263 *
264 * @return the {@link Story}
265 *
266 * @throws IOException
267 * in case of I/O error
268 */
269 public Story processMeta(URL url) throws IOException {
270 return processMeta(url, true, false);
271 }
272
273 /**
274 * Process the given story resource into a partially filled {@link Story}
275 * object containing the name and metadata.
276 *
277 * @param url
278 * the story resource
279 *
280 * @param close
281 * close "this" and "in" when done
282 *
283 * @return the {@link Story}
284 *
285 * @throws IOException
286 * in case of I/O error
287 */
288 protected Story processMeta(URL url, boolean close, boolean getDesc)
289 throws IOException {
290 in = openInput(url);
291 if (in == null) {
292 return null;
293 }
294
295 try {
296 preprocess(url, getInput());
297
298 Story story = new Story();
299 MetaData meta = getMeta(url, getInput());
300 story.setMeta(meta);
301
302 if (meta != null && meta.getCover() == null) {
303 meta.setCover(getDefaultCover(meta.getSubject()));
304 }
305
306 if (getDesc) {
307 String descChapterName = Instance.getTrans().getString(
308 StringId.DESCRIPTION);
309 story.getMeta().setResume(
310 makeChapter(url, 0, descChapterName,
311 getDesc(url, getInput())));
312 }
313
314 return story;
315 } finally {
316 if (close) {
317 try {
318 close();
319 } catch (IOException e) {
320 Instance.syserr(e);
321 }
322
323 if (in != null) {
324 in.close();
325 }
326 }
327 }
328 }
329
330 /**
331 * Process the given story resource into a fully filled {@link Story}
332 * object.
333 *
334 * @param url
335 * the story resource
336 * @param pg
337 * the optional progress reporter
338 *
339 * @return the {@link Story}
340 *
341 * @throws IOException
342 * in case of I/O error
343 */
344 public Story process(URL url, Progress pg) throws IOException {
345 if (pg == null) {
346 pg = new Progress();
347 } else {
348 pg.setMinMax(0, 100);
349 }
350
351 setCurrentReferer(url);
352
353 pg.setProgress(1);
354 try {
355 Story story = processMeta(url, false, true);
356 pg.setProgress(10);
357 if (story == null) {
358 pg.setProgress(100);
359 return null;
360 }
361
362 story.setChapters(new ArrayList<Chapter>());
363
364 List<Entry<String, URL>> chapters = getChapters(url, getInput());
365 pg.setProgress(20);
366
367 int i = 1;
368 if (chapters != null) {
369 Progress pgChaps = new Progress(0, chapters.size());
370 pg.addProgress(pgChaps, 80);
371
372 for (Entry<String, URL> chap : chapters) {
373 setCurrentReferer(chap.getValue());
374 InputStream chapIn = Instance.getCache().open(
375 chap.getValue(), this, true);
376 try {
377 story.getChapters().add(
378 makeChapter(url, i, chap.getKey(),
379 getChapterContent(url, chapIn, i)));
380 } finally {
381 chapIn.close();
382 }
383
384 pgChaps.setProgress(i++);
385 }
386 } else {
387 pg.setProgress(100);
388 }
389
390 return story;
391
392 } finally {
393 try {
394 close();
395 } catch (IOException e) {
396 Instance.syserr(e);
397 }
398
399 if (in != null) {
400 in.close();
401 }
402
403 currentReferer = null;
404 }
405 }
406
407 /**
408 * The support type.$
409 *
410 * @return the type
411 */
412 public SupportType getType() {
413 return type;
414 }
415
416 /**
417 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
418 * the current {@link URL} we work on.
419 *
420 * @return the referer
421 */
422 public URL getCurrentReferer() {
423 return currentReferer;
424 }
425
426 /**
427 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
428 * the current {@link URL} we work on.
429 *
430 * @param currentReferer
431 * the new referer
432 */
433 protected void setCurrentReferer(URL currentReferer) {
434 this.currentReferer = currentReferer;
435 }
436
437 /**
438 * The support type.
439 *
440 * @param type
441 * the new type
442 *
443 * @return this
444 */
445 protected BasicSupport setType(SupportType type) {
446 this.type = type;
447 return this;
448 }
449
450 /**
451 * Prepare the support if needed before processing.
452 *
453 * @param source
454 * the source of the story
455 * @param in
456 * the input (the main resource)
457 *
458 * @throws IOException
459 * on I/O error
460 */
461 protected void preprocess(URL source, InputStream in) throws IOException {
462 }
463
464 /**
465 * Now that we have processed the {@link Story}, close the resources if any.
466 *
467 * @throws IOException
468 * on I/O error
469 */
470 protected void close() throws IOException {
471 }
472
473 /**
474 * Create a {@link Chapter} object from the given information, formatting
475 * the content as it should be.
476 *
477 * @param number
478 * the chapter number
479 * @param name
480 * the chapter name
481 * @param content
482 * the chapter content
483 *
484 * @return the {@link Chapter}
485 *
486 * @throws IOException
487 * in case of I/O error
488 */
489 protected Chapter makeChapter(URL source, int number, String name,
490 String content) throws IOException {
491 // Chapter name: process it correctly, then remove the possible
492 // redundant "Chapter x: " in front of it
493 String chapterName = processPara(name).getContent().trim();
494 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
495 .split(",")) {
496 String chapterWord = Instance.getConfig().getStringX(
497 Config.CHAPTER, lang);
498 if (chapterName.startsWith(chapterWord)) {
499 chapterName = chapterName.substring(chapterWord.length())
500 .trim();
501 break;
502 }
503 }
504
505 if (chapterName.startsWith(Integer.toString(number))) {
506 chapterName = chapterName.substring(
507 Integer.toString(number).length()).trim();
508 }
509
510 if (chapterName.startsWith(":")) {
511 chapterName = chapterName.substring(1).trim();
512 }
513 //
514
515 Chapter chap = new Chapter(number, chapterName);
516
517 if (content != null) {
518 chap.setParagraphs(makeParagraphs(source, content));
519 }
520
521 return chap;
522
523 }
524
525 /**
526 * Convert the given content into {@link Paragraph}s.
527 *
528 * @param source
529 * the source URL of the story
530 * @param content
531 * the textual content
532 *
533 * @return the {@link Paragraph}s
534 *
535 * @throws IOException
536 * in case of I/O error
537 */
538 protected List<Paragraph> makeParagraphs(URL source, String content)
539 throws IOException {
540 if (isHtml()) {
541 // Special <HR> processing:
542 content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
543 "\n* * *\n");
544 }
545
546 List<Paragraph> paras = new ArrayList<Paragraph>();
547 InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8"));
548 try {
549 BufferedReader buff = new BufferedReader(new InputStreamReader(in,
550 "UTF-8"));
551
552 for (String encodedLine = buff.readLine(); encodedLine != null; encodedLine = buff
553 .readLine()) {
554 String lines[];
555 if (isHtml()) {
556 lines = encodedLine.split("(<p>|</p>|<br>|<br/>|\\n)");
557 } else {
558 lines = new String[] { encodedLine };
559 }
560
561 for (String aline : lines) {
562 String line = aline.trim();
563
564 URL image = null;
565 if (line.startsWith("[") && line.endsWith("]")) {
566 image = getImageUrl(this, source,
567 line.substring(1, line.length() - 1).trim());
568 }
569
570 if (image != null) {
571 paras.add(new Paragraph(image));
572 } else {
573 paras.add(processPara(line));
574 }
575 }
576 }
577 } finally {
578 in.close();
579 }
580
581 // Check quotes for "bad" format
582 List<Paragraph> newParas = new ArrayList<Paragraph>();
583 for (Paragraph para : paras) {
584 newParas.addAll(requotify(para));
585 }
586 paras = newParas;
587
588 // Remove double blanks/brks
589 fixBlanksBreaks(paras);
590
591 return paras;
592 }
593
594 /**
595 * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
596 * those {@link Paragraph}s.
597 * <p>
598 * The resulting list will not contain a starting or trailing blank/break
599 * nor 2 blanks or breaks following each other.
600 *
601 * @param paras
602 * the list of {@link Paragraph}s to fix
603 */
604 protected void fixBlanksBreaks(List<Paragraph> paras) {
605 boolean space = false;
606 boolean brk = true;
607 for (int i = 0; i < paras.size(); i++) {
608 Paragraph para = paras.get(i);
609 boolean thisSpace = para.getType() == ParagraphType.BLANK;
610 boolean thisBrk = para.getType() == ParagraphType.BREAK;
611
612 if (i > 0 && space && thisBrk) {
613 paras.remove(i - 1);
614 i--;
615 } else if ((space || brk) && (thisSpace || thisBrk)) {
616 paras.remove(i);
617 i--;
618 }
619
620 space = thisSpace;
621 brk = thisBrk;
622 }
623
624 // Remove blank/brk at start
625 if (paras.size() > 0
626 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
627 0).getType() == ParagraphType.BREAK)) {
628 paras.remove(0);
629 }
630
631 // Remove blank/brk at end
632 int last = paras.size() - 1;
633 if (paras.size() > 0
634 && (paras.get(last).getType() == ParagraphType.BLANK || paras
635 .get(last).getType() == ParagraphType.BREAK)) {
636 paras.remove(last);
637 }
638 }
639
640 /**
641 * Get the default cover related to this subject (see <tt>.info</tt> files).
642 *
643 * @param subject
644 * the subject
645 *
646 * @return the cover if any, or NULL
647 */
648 static BufferedImage getDefaultCover(String subject) {
649 if (subject != null && !subject.isEmpty()
650 && Instance.getCoverDir() != null) {
651 try {
652 File fileCover = new File(Instance.getCoverDir(), subject);
653 return getImage(null, fileCover.toURI().toURL(), subject);
654 } catch (MalformedURLException e) {
655 }
656 }
657
658 return null;
659 }
660
661 /**
662 * Return the list of supported image extensions.
663 *
664 * @return the extensions
665 */
666 static String[] getImageExt(boolean emptyAllowed) {
667 if (emptyAllowed) {
668 return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
669 } else {
670 return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
671 }
672 }
673
674 static BufferedImage getImage(BasicSupport support, URL source, String line) {
675 URL url = getImageUrl(support, source, line);
676 if (url != null) {
677 InputStream in = null;
678 try {
679 in = Instance.getCache().open(url, getSupport(url), true);
680 return IOUtils.toImage(in);
681 } catch (IOException e) {
682 } finally {
683 if (in != null) {
684 try {
685 in.close();
686 } catch (IOException e) {
687 }
688 }
689 }
690 }
691
692 return null;
693 }
694
695 /**
696 * Check if the given resource can be a local image or a remote image, then
697 * refresh the cache with it if it is.
698 *
699 * @param source
700 * the story source
701 * @param line
702 * the resource to check
703 *
704 * @return the image URL if found, or NULL
705 *
706 */
707 static URL getImageUrl(BasicSupport support, URL source, String line) {
708 URL url = null;
709
710 if (line != null) {
711 // try for files
712 String path = null;
713 if (source != null) {
714 path = new File(source.getFile()).getParent();
715 try {
716 String basePath = new File(new File(path), line.trim())
717 .getAbsolutePath();
718 for (String ext : getImageExt(true)) {
719 if (new File(basePath + ext).exists()) {
720 url = new File(basePath + ext).toURI().toURL();
721 }
722 }
723 } catch (Exception e) {
724 // Nothing to do here
725 }
726 }
727
728 if (url == null) {
729 // try for URLs
730 try {
731 for (String ext : getImageExt(true)) {
732 if (Instance.getCache().check(new URL(line + ext))) {
733 url = new URL(line + ext);
734 break;
735 }
736 }
737
738 // try out of cache
739 if (url == null) {
740 for (String ext : getImageExt(true)) {
741 try {
742 url = new URL(line + ext);
743 Instance.getCache().refresh(url, support, true);
744 break;
745 } catch (IOException e) {
746 // no image with this ext
747 url = null;
748 }
749 }
750 }
751 } catch (MalformedURLException e) {
752 // Not an url
753 }
754 }
755
756 // refresh the cached file
757 if (url != null) {
758 try {
759 Instance.getCache().refresh(url, support, true);
760 } catch (IOException e) {
761 // woops, broken image
762 url = null;
763 }
764 }
765 }
766
767 return url;
768 }
769
770 /**
771 * Open the input file that will be used through the support.
772 *
773 * @param source
774 * the source {@link URL}
775 *
776 * @return the {@link InputStream}
777 *
778 * @throws IOException
779 * in case of I/O error
780 */
781 protected InputStream openInput(URL source) throws IOException {
782 return Instance.getCache().open(source, this, false);
783 }
784
785 protected InputStream reset(InputStream in) {
786 try {
787 in.reset();
788 } catch (IOException e) {
789 }
790 return in;
791 }
792
793 /**
794 * Reset then return {@link BasicSupport#in}.
795 *
796 * @return {@link BasicSupport#in}
797 */
798 protected InputStream getInput() {
799 return reset(in);
800 }
801
802 /**
803 * Fix the author name if it is prefixed with some "by" {@link String}.
804 *
805 * @param author
806 * the author with a possible prefix
807 *
808 * @return the author without prefixes
809 */
810 protected String fixAuthor(String author) {
811 if (author != null) {
812 for (String suffix : new String[] { " ", ":" }) {
813 for (String byString : Instance.getConfig()
814 .getString(Config.BYS).split(",")) {
815 byString += suffix;
816 if (author.toUpperCase().startsWith(byString.toUpperCase())) {
817 author = author.substring(byString.length()).trim();
818 }
819 }
820 }
821
822 // Special case (without suffix):
823 if (author.startsWith("©")) {
824 author = author.substring(1);
825 }
826 }
827
828 return author;
829 }
830
831 /**
832 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
833 * and requotify them (i.e., separate them into QUOTE paragraphs and other
834 * paragraphs (quotes or not)).
835 *
836 * @param para
837 * the paragraph to requotify (not necessaraly a quote)
838 *
839 * @return the correctly (or so we hope) quotified paragraphs
840 */
841 protected List<Paragraph> requotify(Paragraph para) {
842 List<Paragraph> newParas = new ArrayList<Paragraph>();
843
844 if (para.getType() == ParagraphType.QUOTE
845 && para.getContent().length() > 2) {
846 String line = para.getContent();
847 boolean singleQ = line.startsWith("" + openQuote);
848 boolean doubleQ = line.startsWith("" + openDoubleQuote);
849
850 // Do not try when more than one quote at a time
851 // (some stories are not easily readable if we do)
852 if (singleQ
853 && line.indexOf(closeQuote, 1) < line
854 .lastIndexOf(closeQuote)) {
855 newParas.add(para);
856 return newParas;
857 }
858 if (doubleQ
859 && line.indexOf(closeDoubleQuote, 1) < line
860 .lastIndexOf(closeDoubleQuote)) {
861 newParas.add(para);
862 return newParas;
863 }
864 //
865
866 if (!singleQ && !doubleQ) {
867 line = openDoubleQuote + line + closeDoubleQuote;
868 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
869 } else {
870 char open = singleQ ? openQuote : openDoubleQuote;
871 char close = singleQ ? closeQuote : closeDoubleQuote;
872
873 int posDot = -1;
874 boolean inQuote = false;
875 int i = 0;
876 for (char car : line.toCharArray()) {
877 if (car == open) {
878 inQuote = true;
879 } else if (car == close) {
880 inQuote = false;
881 } else if (car == '.' && !inQuote) {
882 posDot = i;
883 break;
884 }
885 i++;
886 }
887
888 if (posDot >= 0) {
889 String rest = line.substring(posDot + 1).trim();
890 line = line.substring(0, posDot + 1).trim();
891 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
892 if (!rest.isEmpty()) {
893 newParas.addAll(requotify(processPara(rest)));
894 }
895 } else {
896 newParas.add(para);
897 }
898 }
899 } else {
900 newParas.add(para);
901 }
902
903 return newParas;
904 }
905
906 /**
907 * Process a {@link Paragraph} from a raw line of text.
908 * <p>
909 * Will also fix quotes and HTML encoding if needed.
910 *
911 * @param line
912 * the raw line
913 *
914 * @return the processed {@link Paragraph}
915 */
916 private Paragraph processPara(String line) {
917 line = ifUnhtml(line).trim();
918
919 boolean space = true;
920 boolean brk = true;
921 boolean quote = false;
922 boolean tentativeCloseQuote = false;
923 char prev = '\0';
924 int dashCount = 0;
925
926 StringBuilder builder = new StringBuilder();
927 for (char car : line.toCharArray()) {
928 if (car != '-') {
929 if (dashCount > 0) {
930 // dash, ndash and mdash: - – —
931 // currently: always use mdash
932 builder.append(dashCount == 1 ? '-' : '—');
933 }
934 dashCount = 0;
935 }
936
937 if (tentativeCloseQuote) {
938 tentativeCloseQuote = false;
939 if ((car >= 'a' && car <= 'z') || (car >= 'A' && car <= 'Z')
940 || (car >= '0' && car <= '9')) {
941 builder.append("'");
942 } else {
943 builder.append(closeQuote);
944 }
945 }
946
947 switch (car) {
948 case ' ': // note: unbreakable space
949 case ' ':
950 case '\t':
951 case '\n': // just in case
952 case '\r': // just in case
953 builder.append(' ');
954 break;
955
956 case '\'':
957 if (space || (brk && quote)) {
958 quote = true;
959 builder.append(openQuote);
960 } else if (prev == ' ') {
961 builder.append(openQuote);
962 } else {
963 // it is a quote ("I'm off") or a 'quote' ("This
964 // 'good' restaurant"...)
965 tentativeCloseQuote = true;
966 }
967 break;
968
969 case '"':
970 if (space || (brk && quote)) {
971 quote = true;
972 builder.append(openDoubleQuote);
973 } else if (prev == ' ') {
974 builder.append(openDoubleQuote);
975 } else {
976 builder.append(closeDoubleQuote);
977 }
978 break;
979
980 case '-':
981 if (space) {
982 quote = true;
983 } else {
984 dashCount++;
985 }
986 space = false;
987 break;
988
989 case '*':
990 case '~':
991 case '/':
992 case '\\':
993 case '<':
994 case '>':
995 case '=':
996 case '+':
997 case '_':
998 case '–':
999 case '—':
1000 space = false;
1001 builder.append(car);
1002 break;
1003
1004 case '‘':
1005 case '`':
1006 case '‹':
1007 case '﹁':
1008 case '〈':
1009 case '「':
1010 if (space || (brk && quote)) {
1011 quote = true;
1012 builder.append(openQuote);
1013 } else {
1014 builder.append(openQuote);
1015 }
1016 space = false;
1017 brk = false;
1018 break;
1019
1020 case '’':
1021 case '›':
1022 case '﹂':
1023 case '〉':
1024 case '」':
1025 space = false;
1026 brk = false;
1027 builder.append(closeQuote);
1028 break;
1029
1030 case '«':
1031 case '“':
1032 case '﹃':
1033 case '《':
1034 case '『':
1035 if (space || (brk && quote)) {
1036 quote = true;
1037 builder.append(openDoubleQuote);
1038 } else {
1039 builder.append(openDoubleQuote);
1040 }
1041 space = false;
1042 brk = false;
1043 break;
1044
1045 case '»':
1046 case '”':
1047 case '﹄':
1048 case '》':
1049 case '』':
1050 space = false;
1051 brk = false;
1052 builder.append(closeDoubleQuote);
1053 break;
1054
1055 default:
1056 space = false;
1057 brk = false;
1058 builder.append(car);
1059 break;
1060 }
1061
1062 prev = car;
1063 }
1064
1065 if (tentativeCloseQuote) {
1066 tentativeCloseQuote = false;
1067 builder.append(closeQuote);
1068 }
1069
1070 line = builder.toString().trim();
1071
1072 ParagraphType type = ParagraphType.NORMAL;
1073 if (space) {
1074 type = ParagraphType.BLANK;
1075 } else if (brk) {
1076 type = ParagraphType.BREAK;
1077 } else if (quote) {
1078 type = ParagraphType.QUOTE;
1079 }
1080
1081 return new Paragraph(type, line);
1082 }
1083
1084 /**
1085 * Remove the HTML from the inpit <b>if</b> {@link BasicSupport#isHtml()} is
1086 * true.
1087 *
1088 * @param input
1089 * the input
1090 *
1091 * @return the no html version if needed
1092 */
1093 private String ifUnhtml(String input) {
1094 if (isHtml() && input != null) {
1095 return StringUtils.unhtml(input);
1096 }
1097
1098 return input;
1099 }
1100
1101 /**
1102 * Return a {@link BasicSupport} implementation supporting the given
1103 * resource if possible.
1104 *
1105 * @param url
1106 * the story resource
1107 *
1108 * @return an implementation that supports it, or NULL
1109 */
1110 public static BasicSupport getSupport(URL url) {
1111 if (url == null) {
1112 return null;
1113 }
1114
1115 // TEXT and INFO_TEXT always support files (not URLs though)
1116 for (SupportType type : SupportType.values()) {
1117 if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1118 BasicSupport support = getSupport(type);
1119 if (support != null && support.supports(url)) {
1120 return support;
1121 }
1122 }
1123 }
1124
1125 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
1126 SupportType.TEXT }) {
1127 BasicSupport support = getSupport(type);
1128 if (support != null && support.supports(url)) {
1129 return support;
1130 }
1131 }
1132
1133 return null;
1134 }
1135
1136 /**
1137 * Return a {@link BasicSupport} implementation supporting the given type.
1138 *
1139 * @param type
1140 * the type
1141 *
1142 * @return an implementation that supports it, or NULL
1143 */
1144 public static BasicSupport getSupport(SupportType type) {
1145 switch (type) {
1146 case EPUB:
1147 return new Epub().setType(type);
1148 case INFO_TEXT:
1149 return new InfoText().setType(type);
1150 case FIMFICTION:
1151 return new Fimfiction().setType(type);
1152 case FANFICTION:
1153 return new Fanfiction().setType(type);
1154 case TEXT:
1155 return new Text().setType(type);
1156 case MANGAFOX:
1157 return new MangaFox().setType(type);
1158 case E621:
1159 return new E621().setType(type);
1160 case CBZ:
1161 return new Cbz().setType(type);
1162 case HTML:
1163 return new Html().setType(type);
1164 }
1165
1166 return null;
1167 }
1168
1169 /**
1170 * Return the first line from the given input which correspond to the given
1171 * selectors.
1172 *
1173 * @param in
1174 * the input
1175 * @param needle
1176 * a string that must be found inside the target line (also
1177 * supports "^" at start to say "only if it starts with" the
1178 * needle)
1179 * @param relativeLine
1180 * the line to return based upon the target line position (-1 =
1181 * the line before, 0 = the target line...)
1182 *
1183 * @return the line
1184 */
1185 static String getLine(InputStream in, String needle, int relativeLine) {
1186 return getLine(in, needle, relativeLine, true);
1187 }
1188
1189 /**
1190 * Return a line from the given input which correspond to the given
1191 * selectors.
1192 *
1193 * @param in
1194 * the input
1195 * @param needle
1196 * a string that must be found inside the target line (also
1197 * supports "^" at start to say "only if it starts with" the
1198 * needle)
1199 * @param relativeLine
1200 * the line to return based upon the target line position (-1 =
1201 * the line before, 0 = the target line...)
1202 * @param first
1203 * takes the first result (as opposed to the last one, which will
1204 * also always spend the input)
1205 *
1206 * @return the line
1207 */
1208 static String getLine(InputStream in, String needle, int relativeLine,
1209 boolean first) {
1210 String rep = null;
1211
1212 try {
1213 in.reset();
1214 } catch (IOException e) {
1215 Instance.syserr(e);
1216 }
1217
1218 List<String> lines = new ArrayList<String>();
1219 @SuppressWarnings("resource")
1220 Scanner scan = new Scanner(in, "UTF-8");
1221 int index = -1;
1222 scan.useDelimiter("\\n");
1223 while (scan.hasNext()) {
1224 lines.add(scan.next());
1225
1226 if (index == -1) {
1227 if (needle.startsWith("^")) {
1228 if (lines.get(lines.size() - 1).startsWith(
1229 needle.substring(1))) {
1230 index = lines.size() - 1;
1231 }
1232
1233 } else {
1234 if (lines.get(lines.size() - 1).contains(needle)) {
1235 index = lines.size() - 1;
1236 }
1237 }
1238 }
1239
1240 if (index >= 0 && index + relativeLine < lines.size()) {
1241 rep = lines.get(index + relativeLine);
1242 if (first) {
1243 break;
1244 }
1245 }
1246 }
1247
1248 return rep;
1249 }
1250 }