b230bc3252e249caf57560b9c394cefa5fd135f8
[fanfix.git] / src / be / nikiroo / fanfix / supported / BasicSupport.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.awt.image.BufferedImage;
4 import java.io.BufferedReader;
5 import java.io.ByteArrayInputStream;
6 import java.io.File;
7 import java.io.IOException;
8 import java.io.InputStream;
9 import java.io.InputStreamReader;
10 import java.net.MalformedURLException;
11 import java.net.URL;
12 import java.util.ArrayList;
13 import java.util.Date;
14 import java.util.HashMap;
15 import java.util.List;
16 import java.util.Map;
17 import java.util.Map.Entry;
18 import java.util.Scanner;
19
20 import be.nikiroo.fanfix.Instance;
21 import be.nikiroo.fanfix.bundles.Config;
22 import be.nikiroo.fanfix.bundles.StringId;
23 import be.nikiroo.fanfix.data.Chapter;
24 import be.nikiroo.fanfix.data.MetaData;
25 import be.nikiroo.fanfix.data.Paragraph;
26 import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
27 import be.nikiroo.fanfix.data.Story;
28 import be.nikiroo.utils.IOUtils;
29 import be.nikiroo.utils.Progress;
30 import be.nikiroo.utils.StringUtils;
31
32 /**
33 * This class is the base class used by the other support classes. It can be
34 * used outside of this package, and have static method that you can use to get
35 * access to the correct support class.
36 * <p>
37 * It will be used with 'resources' (usually web pages or files).
38 *
39 * @author niki
40 */
41 public abstract class BasicSupport {
42 /**
43 * The supported input types for which we can get a {@link BasicSupport}
44 * object.
45 *
46 * @author niki
47 */
48 public enum SupportType {
49 /** EPUB files created with this program */
50 EPUB,
51 /** Pure text file with some rules */
52 TEXT,
53 /** TEXT but with associated .info file */
54 INFO_TEXT,
55 /** My Little Pony fanfictions */
56 FIMFICTION,
57 /** Fanfictions from a lot of different universes */
58 FANFICTION,
59 /** Website with lots of Mangas */
60 MANGAFOX,
61 /** Furry website with comics support */
62 E621,
63 /** Furry website with stories */
64 YIFFSTAR,
65 /** CBZ files */
66 CBZ,
67 /** HTML files */
68 HTML;
69
70 /**
71 * A description of this support type (more information than the
72 * {@link BasicSupport#getSourceName()}).
73 *
74 * @return the description
75 */
76 public String getDesc() {
77 String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
78 this.name());
79
80 if (desc == null) {
81 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
82 }
83
84 return desc;
85 }
86
87 /**
88 * The name of this support type (a short version).
89 *
90 * @return the name
91 */
92 public String getSourceName() {
93 BasicSupport support = BasicSupport.getSupport(this);
94 if (support != null) {
95 return support.getSourceName();
96 }
97
98 return null;
99 }
100
101 @Override
102 public String toString() {
103 return super.toString().toLowerCase();
104 }
105
106 /**
107 * Call {@link SupportType#valueOf(String.toUpperCase())}.
108 *
109 * @param typeName
110 * the possible type name
111 *
112 * @return NULL or the type
113 */
114 public static SupportType valueOfUC(String typeName) {
115 return SupportType.valueOf(typeName == null ? null : typeName
116 .toUpperCase());
117 }
118
119 /**
120 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
121 * NULL for NULL instead of raising exception.
122 *
123 * @param typeName
124 * the possible type name
125 *
126 * @return NULL or the type
127 */
128 public static SupportType valueOfNullOkUC(String typeName) {
129 if (typeName == null) {
130 return null;
131 }
132
133 return SupportType.valueOfUC(typeName);
134 }
135
136 /**
137 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
138 * NULL in case of error instead of raising an exception.
139 *
140 * @param typeName
141 * the possible type name
142 *
143 * @return NULL or the type
144 */
145 public static SupportType valueOfAllOkUC(String typeName) {
146 try {
147 return SupportType.valueOfUC(typeName);
148 } catch (Exception e) {
149 return null;
150 }
151 }
152 }
153
154 private InputStream in;
155 private SupportType type;
156 private URL currentReferer; // with only one 'r', as in 'HTTP'...
157
158 // quote chars
159 private char openQuote = Instance.getTrans().getCharacter(
160 StringId.OPEN_SINGLE_QUOTE);
161 private char closeQuote = Instance.getTrans().getCharacter(
162 StringId.CLOSE_SINGLE_QUOTE);
163 private char openDoubleQuote = Instance.getTrans().getCharacter(
164 StringId.OPEN_DOUBLE_QUOTE);
165 private char closeDoubleQuote = Instance.getTrans().getCharacter(
166 StringId.CLOSE_DOUBLE_QUOTE);
167
168 /**
169 * The name of this support class.
170 *
171 * @return the name
172 */
173 protected abstract String getSourceName();
174
175 /**
176 * Check if the given resource is supported by this {@link BasicSupport}.
177 *
178 * @param url
179 * the resource to check for
180 *
181 * @return TRUE if it is
182 */
183 protected abstract boolean supports(URL url);
184
185 /**
186 * Return TRUE if the support will return HTML encoded content values for
187 * the chapters content.
188 *
189 * @return TRUE for HTML
190 */
191 protected abstract boolean isHtml();
192
193 protected abstract MetaData getMeta(URL source, InputStream in)
194 throws IOException;
195
196 /**
197 * Return the story description.
198 *
199 * @param source
200 * the source of the story
201 * @param in
202 * the input (the main resource)
203 *
204 * @return the description
205 *
206 * @throws IOException
207 * in case of I/O error
208 */
209 protected abstract String getDesc(URL source, InputStream in)
210 throws IOException;
211
212 /**
213 * Return the list of chapters (name and resource).
214 *
215 * @param source
216 * the source of the story
217 * @param in
218 * the input (the main resource)
219 * @param pg
220 * the optional progress reporter
221 *
222 * @return the chapters
223 *
224 * @throws IOException
225 * in case of I/O error
226 */
227 protected abstract List<Entry<String, URL>> getChapters(URL source,
228 InputStream in, Progress pg) throws IOException;
229
230 /**
231 * Return the content of the chapter (possibly HTML encoded, if
232 * {@link BasicSupport#isHtml()} is TRUE).
233 *
234 * @param source
235 * the source of the story
236 * @param in
237 * the input (the main resource)
238 * @param number
239 * the chapter number
240 * @param pg
241 * the optional progress reporter
242 *
243 * @return the content
244 *
245 * @throws IOException
246 * in case of I/O error
247 */
248 protected abstract String getChapterContent(URL source, InputStream in,
249 int number, Progress pg) throws IOException;
250
251 /**
252 * Log into the support (can be a no-op depending upon the support).
253 *
254 * @throws IOException
255 * in case of I/O error
256 */
257 public void login() throws IOException {
258
259 }
260
261 /**
262 * Return the list of cookies (values included) that must be used to
263 * correctly fetch the resources.
264 * <p>
265 * You are expected to call the super method implementation if you override
266 * it.
267 *
268 * @return the cookies
269 *
270 * @throws IOException
271 * in case of I/O error
272 */
273 public Map<String, String> getCookies() throws IOException {
274 return new HashMap<String, String>();
275 }
276
277 /**
278 * Return the canonical form of the main {@link URL}.
279 *
280 * @param source
281 * the source {@link URL}
282 *
283 * @return the canonical form of this {@link URL}
284 *
285 * @throws IOException
286 * in case of I/O error
287 */
288 public URL getCanonicalUrl(URL source) throws IOException {
289 return source;
290 }
291
292 /**
293 * Process the given story resource into a partially filled {@link Story}
294 * object containing the name and metadata, except for the description.
295 *
296 * @param url
297 * the story resource
298 *
299 * @return the {@link Story}
300 *
301 * @throws IOException
302 * in case of I/O error
303 */
304 public Story processMeta(URL url) throws IOException {
305 return processMeta(url, true, false, null);
306 }
307
308 /**
309 * Process the given story resource into a partially filled {@link Story}
310 * object containing the name and metadata.
311 *
312 * @param url
313 * the story resource
314 *
315 * @param close
316 * close "this" and "in" when done
317 * @param pg
318 * the optional progress reporter
319 *
320 * @return the {@link Story}
321 *
322 * @throws IOException
323 * in case of I/O error
324 */
325 protected Story processMeta(URL url, boolean close, boolean getDesc,
326 Progress pg) throws IOException {
327 if (pg == null) {
328 pg = new Progress();
329 } else {
330 pg.setMinMax(0, 100);
331 }
332
333 login();
334 pg.setProgress(10);
335
336 url = getCanonicalUrl(url);
337
338 setCurrentReferer(url);
339
340 in = openInput(url);
341 if (in == null) {
342 return null;
343 }
344
345 try {
346 preprocess(url, getInput());
347 pg.setProgress(30);
348
349 Story story = new Story();
350 MetaData meta = getMeta(url, getInput());
351 if (meta.getCreationDate() == null
352 || meta.getCreationDate().isEmpty()) {
353 meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
354 }
355 story.setMeta(meta);
356
357 pg.setProgress(50);
358
359 if (meta != null && meta.getCover() == null) {
360 meta.setCover(getDefaultCover(meta.getSubject()));
361 }
362
363 pg.setProgress(60);
364
365 if (getDesc) {
366 String descChapterName = Instance.getTrans().getString(
367 StringId.DESCRIPTION);
368 story.getMeta().setResume(
369 makeChapter(url, 0, descChapterName,
370 getDesc(url, getInput()), null));
371 }
372
373 pg.setProgress(100);
374 return story;
375 } finally {
376 if (close) {
377 try {
378 close();
379 } catch (IOException e) {
380 Instance.syserr(e);
381 }
382
383 if (in != null) {
384 in.close();
385 }
386 }
387
388 setCurrentReferer(null);
389 }
390 }
391
392 /**
393 * Process the given story resource into a fully filled {@link Story}
394 * object.
395 *
396 * @param url
397 * the story resource
398 * @param pg
399 * the optional progress reporter
400 *
401 * @return the {@link Story}
402 *
403 * @throws IOException
404 * in case of I/O error
405 */
406 public Story process(URL url, Progress pg) throws IOException {
407 if (pg == null) {
408 pg = new Progress();
409 } else {
410 pg.setMinMax(0, 100);
411 }
412
413 url = getCanonicalUrl(url);
414 pg.setProgress(1);
415 try {
416 Progress pgMeta = new Progress();
417 pg.addProgress(pgMeta, 10);
418 Story story = processMeta(url, false, true, pgMeta);
419 if (!pgMeta.isDone()) {
420 pgMeta.setProgress(pgMeta.getMax()); // 10%
421 }
422
423 if (story == null) {
424 pg.setProgress(90);
425 return null;
426 }
427
428 pg.setName("Retrieving " + story.getMeta().getTitle());
429
430 setCurrentReferer(url);
431
432 Progress pgGetChapters = new Progress();
433 pg.addProgress(pgGetChapters, 10);
434 story.setChapters(new ArrayList<Chapter>());
435 List<Entry<String, URL>> chapters = getChapters(url, getInput(),
436 pgGetChapters);
437 if (!pgGetChapters.isDone()) {
438 pgGetChapters.setProgress(pgGetChapters.getMax()); // 20%
439 }
440
441 if (chapters != null) {
442 Progress pgChaps = new Progress("Extracting chapters", 0,
443 chapters.size() * 300);
444 pg.addProgress(pgChaps, 80);
445
446 long words = 0;
447 int i = 1;
448 for (Entry<String, URL> chap : chapters) {
449 pgChaps.setName("Extracting chapter " + i);
450 setCurrentReferer(chap.getValue());
451 InputStream chapIn = Instance.getCache().open(
452 chap.getValue(), this, true);
453 pgChaps.setProgress(i * 100);
454 try {
455 Progress pgGetChapterContent = new Progress();
456 Progress pgMakeChapter = new Progress();
457 pgChaps.addProgress(pgGetChapterContent, 100);
458 pgChaps.addProgress(pgMakeChapter, 100);
459
460 String content = getChapterContent(url, chapIn, i,
461 pgGetChapterContent);
462 if (!pgGetChapterContent.isDone()) {
463 pgGetChapterContent.setProgress(pgGetChapterContent
464 .getMax());
465 }
466
467 Chapter cc = makeChapter(url, i, chap.getKey(),
468 content, pgMakeChapter);
469 if (!pgMakeChapter.isDone()) {
470 pgMakeChapter.setProgress(pgMakeChapter.getMax());
471 }
472
473 words += cc.getWords();
474 story.getChapters().add(cc);
475 if (story.getMeta() != null) {
476 story.getMeta().setWords(words);
477 }
478 } finally {
479 chapIn.close();
480 }
481
482 i++;
483 }
484
485 pgChaps.setName("Extracting chapters");
486 } else {
487 pg.setProgress(80);
488 }
489
490 return story;
491
492 } finally {
493 try {
494 close();
495 } catch (IOException e) {
496 Instance.syserr(e);
497 }
498
499 if (in != null) {
500 in.close();
501 }
502
503 setCurrentReferer(null);
504 }
505 }
506
507 /**
508 * The support type.
509 *
510 * @return the type
511 */
512 public SupportType getType() {
513 return type;
514 }
515
516 /**
517 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
518 * the current {@link URL} we work on.
519 *
520 * @return the referer
521 */
522 public URL getCurrentReferer() {
523 return currentReferer;
524 }
525
526 /**
527 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
528 * the current {@link URL} we work on.
529 *
530 * @param currentReferer
531 * the new referer
532 */
533 protected void setCurrentReferer(URL currentReferer) {
534 this.currentReferer = currentReferer;
535 }
536
537 /**
538 * The support type.
539 *
540 * @param type
541 * the new type
542 *
543 * @return this
544 */
545 protected BasicSupport setType(SupportType type) {
546 this.type = type;
547 return this;
548 }
549
550 /**
551 * Prepare the support if needed before processing.
552 *
553 * @param source
554 * the source of the story
555 * @param in
556 * the input (the main resource)
557 *
558 * @throws IOException
559 * on I/O error
560 */
561 protected void preprocess(URL source, InputStream in) throws IOException {
562 }
563
564 /**
565 * Now that we have processed the {@link Story}, close the resources if any.
566 *
567 * @throws IOException
568 * on I/O error
569 */
570 protected void close() throws IOException {
571 }
572
573 /**
574 * Create a {@link Chapter} object from the given information, formatting
575 * the content as it should be.
576 *
577 * @param number
578 * the chapter number
579 * @param name
580 * the chapter name
581 * @param content
582 * the chapter content
583 * @param pg
584 * the optional progress reporter
585 *
586 * @return the {@link Chapter}
587 *
588 * @throws IOException
589 * in case of I/O error
590 */
591 protected Chapter makeChapter(URL source, int number, String name,
592 String content, Progress pg) throws IOException {
593 // Chapter name: process it correctly, then remove the possible
594 // redundant "Chapter x: " in front of it
595 String chapterName = processPara(name).getContent().trim();
596 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
597 .split(",")) {
598 String chapterWord = Instance.getConfig().getStringX(
599 Config.CHAPTER, lang);
600 if (chapterName.startsWith(chapterWord)) {
601 chapterName = chapterName.substring(chapterWord.length())
602 .trim();
603 break;
604 }
605 }
606
607 if (chapterName.startsWith(Integer.toString(number))) {
608 chapterName = chapterName.substring(
609 Integer.toString(number).length()).trim();
610 }
611
612 if (chapterName.startsWith(":")) {
613 chapterName = chapterName.substring(1).trim();
614 }
615 //
616
617 Chapter chap = new Chapter(number, chapterName);
618
619 if (content != null) {
620 List<Paragraph> paras = makeParagraphs(source, content, pg);
621 long words = 0;
622 for (Paragraph para : paras) {
623 words += para.getWords();
624 }
625 chap.setParagraphs(paras);
626 chap.setWords(words);
627 }
628
629 return chap;
630
631 }
632
633 /**
634 * Convert the given content into {@link Paragraph}s.
635 *
636 * @param source
637 * the source URL of the story
638 * @param content
639 * the textual content
640 * @param pg
641 * the optional progress reporter
642 *
643 * @return the {@link Paragraph}s
644 *
645 * @throws IOException
646 * in case of I/O error
647 */
648 protected List<Paragraph> makeParagraphs(URL source, String content,
649 Progress pg) throws IOException {
650 if (pg == null) {
651 pg = new Progress();
652 }
653
654 if (isHtml()) {
655 // Special <HR> processing:
656 content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
657 "<br/>* * *<br/>");
658 }
659
660 List<Paragraph> paras = new ArrayList<Paragraph>();
661
662 if (content != null && !content.trim().isEmpty()) {
663 if (isHtml()) {
664 String[] tab = content.split("(<p>|</p>|<br>|<br/>)");
665 pg.setMinMax(0, tab.length);
666 int i = 1;
667 for (String line : tab) {
668 if (line.startsWith("[") && line.endsWith("]")) {
669 pg.setName("Extracting image " + i);
670 }
671 paras.add(makeParagraph(source, line.trim()));
672 pg.setProgress(i++);
673 }
674 pg.setName(null);
675 } else {
676 List<String> lines = new ArrayList<String>();
677 BufferedReader buff = null;
678 try {
679 buff = new BufferedReader(
680 new InputStreamReader(new ByteArrayInputStream(
681 content.getBytes("UTF-8")), "UTF-8"));
682 for (String line = buff.readLine(); line != null; line = buff
683 .readLine()) {
684 lines.add(line.trim());
685 }
686 } finally {
687 if (buff != null) {
688 buff.close();
689 }
690 }
691
692 pg.setMinMax(0, lines.size());
693 int i = 0;
694 for (String line : lines) {
695 if (line.startsWith("[") && line.endsWith("]")) {
696 pg.setName("Extracting image " + i);
697 }
698 paras.add(makeParagraph(source, line));
699 pg.setProgress(i++);
700 }
701 pg.setName(null);
702 }
703
704 // Check quotes for "bad" format
705 List<Paragraph> newParas = new ArrayList<Paragraph>();
706 for (Paragraph para : paras) {
707 newParas.addAll(requotify(para));
708 }
709 paras = newParas;
710
711 // Remove double blanks/brks
712 fixBlanksBreaks(paras);
713 }
714
715 return paras;
716 }
717
718 /**
719 * Convert the given line into a single {@link Paragraph}.
720 *
721 * @param source
722 * the source URL of the story
723 * @param line
724 * the textual content of the paragraph
725 *
726 * @return the {@link Paragraph}
727 */
728 private Paragraph makeParagraph(URL source, String line) {
729 URL image = null;
730 if (line.startsWith("[") && line.endsWith("]")) {
731 image = getImageUrl(this, source,
732 line.substring(1, line.length() - 1).trim());
733 }
734
735 if (image != null) {
736 return new Paragraph(image);
737 } else {
738 return processPara(line);
739 }
740 }
741
742 /**
743 * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
744 * those {@link Paragraph}s.
745 * <p>
746 * The resulting list will not contain a starting or trailing blank/break
747 * nor 2 blanks or breaks following each other.
748 *
749 * @param paras
750 * the list of {@link Paragraph}s to fix
751 */
752 protected void fixBlanksBreaks(List<Paragraph> paras) {
753 boolean space = false;
754 boolean brk = true;
755 for (int i = 0; i < paras.size(); i++) {
756 Paragraph para = paras.get(i);
757 boolean thisSpace = para.getType() == ParagraphType.BLANK;
758 boolean thisBrk = para.getType() == ParagraphType.BREAK;
759
760 if (i > 0 && space && thisBrk) {
761 paras.remove(i - 1);
762 i--;
763 } else if ((space || brk) && (thisSpace || thisBrk)) {
764 paras.remove(i);
765 i--;
766 }
767
768 space = thisSpace;
769 brk = thisBrk;
770 }
771
772 // Remove blank/brk at start
773 if (paras.size() > 0
774 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
775 0).getType() == ParagraphType.BREAK)) {
776 paras.remove(0);
777 }
778
779 // Remove blank/brk at end
780 int last = paras.size() - 1;
781 if (paras.size() > 0
782 && (paras.get(last).getType() == ParagraphType.BLANK || paras
783 .get(last).getType() == ParagraphType.BREAK)) {
784 paras.remove(last);
785 }
786 }
787
788 /**
789 * Get the default cover related to this subject (see <tt>.info</tt> files).
790 *
791 * @param subject
792 * the subject
793 *
794 * @return the cover if any, or NULL
795 */
796 static BufferedImage getDefaultCover(String subject) {
797 if (subject != null && !subject.isEmpty()
798 && Instance.getCoverDir() != null) {
799 try {
800 File fileCover = new File(Instance.getCoverDir(), subject);
801 return getImage(null, fileCover.toURI().toURL(), subject);
802 } catch (MalformedURLException e) {
803 }
804 }
805
806 return null;
807 }
808
809 /**
810 * Return the list of supported image extensions.
811 *
812 * @param emptyAllowed
813 * TRUE to allow an empty extension on first place, which can be
814 * used when you may already have an extension in your input but
815 * are not sure about it
816 *
817 * @return the extensions
818 */
819 static String[] getImageExt(boolean emptyAllowed) {
820 if (emptyAllowed) {
821 return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
822 } else {
823 return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
824 }
825 }
826
827 /**
828 * Check if the given resource can be a local image or a remote image, then
829 * refresh the cache with it if it is.
830 *
831 * @param source
832 * the story source
833 * @param line
834 * the resource to check
835 *
836 * @return the image if found, or NULL
837 *
838 */
839 static BufferedImage getImage(BasicSupport support, URL source, String line) {
840 URL url = getImageUrl(support, source, line);
841 if (url != null) {
842 InputStream in = null;
843 try {
844 in = Instance.getCache().open(url, getSupport(url), true);
845 return IOUtils.toImage(in);
846 } catch (IOException e) {
847 } finally {
848 if (in != null) {
849 try {
850 in.close();
851 } catch (IOException e) {
852 }
853 }
854 }
855 }
856
857 return null;
858 }
859
860 /**
861 * Check if the given resource can be a local image or a remote image, then
862 * refresh the cache with it if it is.
863 *
864 * @param source
865 * the story source
866 * @param line
867 * the resource to check
868 *
869 * @return the image URL if found, or NULL
870 *
871 */
872 static URL getImageUrl(BasicSupport support, URL source, String line) {
873 URL url = null;
874
875 if (line != null) {
876 // try for files
877 if (source != null) {
878 try {
879
880 String relPath = null;
881 String absPath = null;
882 try {
883 String path = new File(source.getFile()).getParent();
884 relPath = new File(new File(path), line.trim())
885 .getAbsolutePath();
886 } catch (Exception e) {
887 // Cannot be converted to path (one possibility to take
888 // into account: absolute path on Windows)
889 }
890 try {
891 absPath = new File(line.trim()).getAbsolutePath();
892 } catch (Exception e) {
893 // Cannot be converted to path (at all)
894 }
895
896 for (String ext : getImageExt(true)) {
897 if (absPath != null && new File(absPath + ext).exists()) {
898 url = new File(absPath + ext).toURI().toURL();
899 } else if (relPath != null
900 && new File(relPath + ext).exists()) {
901 url = new File(relPath + ext).toURI().toURL();
902 }
903 }
904 } catch (Exception e) {
905 // Should not happen since we control the correct arguments
906 }
907 }
908
909 if (url == null) {
910 // try for URLs
911 try {
912 for (String ext : getImageExt(true)) {
913 if (Instance.getCache().check(new URL(line + ext))) {
914 url = new URL(line + ext);
915 break;
916 }
917 }
918
919 // try out of cache
920 if (url == null) {
921 for (String ext : getImageExt(true)) {
922 try {
923 url = new URL(line + ext);
924 Instance.getCache().refresh(url, support, true);
925 break;
926 } catch (IOException e) {
927 // no image with this ext
928 url = null;
929 }
930 }
931 }
932 } catch (MalformedURLException e) {
933 // Not an url
934 }
935 }
936
937 // refresh the cached file
938 if (url != null) {
939 try {
940 Instance.getCache().refresh(url, support, true);
941 } catch (IOException e) {
942 // woops, broken image
943 url = null;
944 }
945 }
946 }
947
948 return url;
949 }
950
951 /**
952 * Open the input file that will be used through the support.
953 *
954 * @param source
955 * the source {@link URL}
956 *
957 * @return the {@link InputStream}
958 *
959 * @throws IOException
960 * in case of I/O error
961 */
962 protected InputStream openInput(URL source) throws IOException {
963 return Instance.getCache().open(source, this, false);
964 }
965
966 /**
967 * Reset the given {@link InputStream} and return it.
968 *
969 * @param in
970 * the {@link InputStream} to reset
971 *
972 * @return the same {@link InputStream} after reset
973 */
974 protected InputStream reset(InputStream in) {
975 try {
976 in.reset();
977 } catch (IOException e) {
978 }
979 return in;
980 }
981
982 /**
983 * Reset then return {@link BasicSupport#in}.
984 *
985 * @return {@link BasicSupport#in}
986 */
987 protected InputStream getInput() {
988 return reset(in);
989 }
990
991 /**
992 * Fix the author name if it is prefixed with some "by" {@link String}.
993 *
994 * @param author
995 * the author with a possible prefix
996 *
997 * @return the author without prefixes
998 */
999 protected String fixAuthor(String author) {
1000 if (author != null) {
1001 for (String suffix : new String[] { " ", ":" }) {
1002 for (String byString : Instance.getConfig()
1003 .getString(Config.BYS).split(",")) {
1004 byString += suffix;
1005 if (author.toUpperCase().startsWith(byString.toUpperCase())) {
1006 author = author.substring(byString.length()).trim();
1007 }
1008 }
1009 }
1010
1011 // Special case (without suffix):
1012 if (author.startsWith("©")) {
1013 author = author.substring(1);
1014 }
1015 }
1016
1017 return author;
1018 }
1019
1020 /**
1021 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
1022 * and requotify them (i.e., separate them into QUOTE paragraphs and other
1023 * paragraphs (quotes or not)).
1024 *
1025 * @param para
1026 * the paragraph to requotify (not necessarily a quote)
1027 *
1028 * @return the correctly (or so we hope) quotified paragraphs
1029 */
1030 protected List<Paragraph> requotify(Paragraph para) {
1031 List<Paragraph> newParas = new ArrayList<Paragraph>();
1032
1033 if (para.getType() == ParagraphType.QUOTE
1034 && para.getContent().length() > 2) {
1035 String line = para.getContent();
1036 boolean singleQ = line.startsWith("" + openQuote);
1037 boolean doubleQ = line.startsWith("" + openDoubleQuote);
1038
1039 // Do not try when more than one quote at a time
1040 // (some stories are not easily readable if we do)
1041 if (singleQ
1042 && line.indexOf(closeQuote, 1) < line
1043 .lastIndexOf(closeQuote)) {
1044 newParas.add(para);
1045 return newParas;
1046 }
1047 if (doubleQ
1048 && line.indexOf(closeDoubleQuote, 1) < line
1049 .lastIndexOf(closeDoubleQuote)) {
1050 newParas.add(para);
1051 return newParas;
1052 }
1053 //
1054
1055 if (!singleQ && !doubleQ) {
1056 line = openDoubleQuote + line + closeDoubleQuote;
1057 newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
1058 .getWords()));
1059 } else {
1060 char open = singleQ ? openQuote : openDoubleQuote;
1061 char close = singleQ ? closeQuote : closeDoubleQuote;
1062
1063 int posDot = -1;
1064 boolean inQuote = false;
1065 int i = 0;
1066 for (char car : line.toCharArray()) {
1067 if (car == open) {
1068 inQuote = true;
1069 } else if (car == close) {
1070 inQuote = false;
1071 } else if (car == '.' && !inQuote) {
1072 posDot = i;
1073 break;
1074 }
1075 i++;
1076 }
1077
1078 if (posDot >= 0) {
1079 String rest = line.substring(posDot + 1).trim();
1080 line = line.substring(0, posDot + 1).trim();
1081 long words = 1;
1082 for (char car : line.toCharArray()) {
1083 if (car == ' ') {
1084 words++;
1085 }
1086 }
1087 newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
1088 if (!rest.isEmpty()) {
1089 newParas.addAll(requotify(processPara(rest)));
1090 }
1091 } else {
1092 newParas.add(para);
1093 }
1094 }
1095 } else {
1096 newParas.add(para);
1097 }
1098
1099 return newParas;
1100 }
1101
1102 /**
1103 * Process a {@link Paragraph} from a raw line of text.
1104 * <p>
1105 * Will also fix quotes and HTML encoding if needed.
1106 *
1107 * @param line
1108 * the raw line
1109 *
1110 * @return the processed {@link Paragraph}
1111 */
1112 protected Paragraph processPara(String line) {
1113 line = ifUnhtml(line).trim();
1114
1115 boolean space = true;
1116 boolean brk = true;
1117 boolean quote = false;
1118 boolean tentativeCloseQuote = false;
1119 char prev = '\0';
1120 int dashCount = 0;
1121 long words = 1;
1122
1123 StringBuilder builder = new StringBuilder();
1124 for (char car : line.toCharArray()) {
1125 if (car != '-') {
1126 if (dashCount > 0) {
1127 // dash, ndash and mdash: - – —
1128 // currently: always use mdash
1129 builder.append(dashCount == 1 ? '-' : '—');
1130 }
1131 dashCount = 0;
1132 }
1133
1134 if (tentativeCloseQuote) {
1135 tentativeCloseQuote = false;
1136 if (Character.isLetterOrDigit(car)) {
1137 builder.append("'");
1138 } else {
1139 // handle double-single quotes as double quotes
1140 if (prev == car) {
1141 builder.append(closeDoubleQuote);
1142 continue;
1143 } else {
1144 builder.append(closeQuote);
1145 }
1146 }
1147 }
1148
1149 switch (car) {
1150 case ' ': // note: unbreakable space
1151 case ' ':
1152 case '\t':
1153 case '\n': // just in case
1154 case '\r': // just in case
1155 if (builder.length() > 0
1156 && builder.charAt(builder.length() - 1) != ' ') {
1157 words++;
1158 }
1159 builder.append(' ');
1160 break;
1161
1162 case '\'':
1163 if (space || (brk && quote)) {
1164 quote = true;
1165 // handle double-single quotes as double quotes
1166 if (prev == car) {
1167 builder.deleteCharAt(builder.length() - 1);
1168 builder.append(openDoubleQuote);
1169 } else {
1170 builder.append(openQuote);
1171 }
1172 } else if (prev == ' ' || prev == car) {
1173 // handle double-single quotes as double quotes
1174 if (prev == car) {
1175 builder.deleteCharAt(builder.length() - 1);
1176 builder.append(openDoubleQuote);
1177 } else {
1178 builder.append(openQuote);
1179 }
1180 } else {
1181 // it is a quote ("I'm off") or a 'quote' ("This
1182 // 'good' restaurant"...)
1183 tentativeCloseQuote = true;
1184 }
1185 break;
1186
1187 case '"':
1188 if (space || (brk && quote)) {
1189 quote = true;
1190 builder.append(openDoubleQuote);
1191 } else if (prev == ' ') {
1192 builder.append(openDoubleQuote);
1193 } else {
1194 builder.append(closeDoubleQuote);
1195 }
1196 break;
1197
1198 case '-':
1199 if (space) {
1200 quote = true;
1201 } else {
1202 dashCount++;
1203 }
1204 space = false;
1205 break;
1206
1207 case '*':
1208 case '~':
1209 case '/':
1210 case '\\':
1211 case '<':
1212 case '>':
1213 case '=':
1214 case '+':
1215 case '_':
1216 case '–':
1217 case '—':
1218 space = false;
1219 builder.append(car);
1220 break;
1221
1222 case '‘':
1223 case '`':
1224 case '‹':
1225 case '﹁':
1226 case '〈':
1227 case '「':
1228 if (space || (brk && quote)) {
1229 quote = true;
1230 builder.append(openQuote);
1231 } else {
1232 // handle double-single quotes as double quotes
1233 if (prev == car) {
1234 builder.deleteCharAt(builder.length() - 1);
1235 builder.append(openDoubleQuote);
1236 } else {
1237 builder.append(openQuote);
1238 }
1239 }
1240 space = false;
1241 brk = false;
1242 break;
1243
1244 case '’':
1245 case '›':
1246 case '﹂':
1247 case '〉':
1248 case '」':
1249 space = false;
1250 brk = false;
1251 // handle double-single quotes as double quotes
1252 if (prev == car) {
1253 builder.deleteCharAt(builder.length() - 1);
1254 builder.append(closeDoubleQuote);
1255 } else {
1256 builder.append(closeQuote);
1257 }
1258 break;
1259
1260 case '«':
1261 case '“':
1262 case '﹃':
1263 case '《':
1264 case '『':
1265 if (space || (brk && quote)) {
1266 quote = true;
1267 builder.append(openDoubleQuote);
1268 } else {
1269 builder.append(openDoubleQuote);
1270 }
1271 space = false;
1272 brk = false;
1273 break;
1274
1275 case '»':
1276 case '”':
1277 case '﹄':
1278 case '》':
1279 case '』':
1280 space = false;
1281 brk = false;
1282 builder.append(closeDoubleQuote);
1283 break;
1284
1285 default:
1286 space = false;
1287 brk = false;
1288 builder.append(car);
1289 break;
1290 }
1291
1292 prev = car;
1293 }
1294
1295 if (tentativeCloseQuote) {
1296 tentativeCloseQuote = false;
1297 builder.append(closeQuote);
1298 }
1299
1300 line = builder.toString().trim();
1301
1302 ParagraphType type = ParagraphType.NORMAL;
1303 if (space) {
1304 type = ParagraphType.BLANK;
1305 } else if (brk) {
1306 type = ParagraphType.BREAK;
1307 } else if (quote) {
1308 type = ParagraphType.QUOTE;
1309 }
1310
1311 return new Paragraph(type, line, words);
1312 }
1313
1314 /**
1315 * Remove the HTML from the input <b>if</b> {@link BasicSupport#isHtml()} is
1316 * true.
1317 *
1318 * @param input
1319 * the input
1320 *
1321 * @return the no html version if needed
1322 */
1323 private String ifUnhtml(String input) {
1324 if (isHtml() && input != null) {
1325 return StringUtils.unhtml(input);
1326 }
1327
1328 return input;
1329 }
1330
1331 /**
1332 * Return a {@link BasicSupport} implementation supporting the given
1333 * resource if possible.
1334 *
1335 * @param url
1336 * the story resource
1337 *
1338 * @return an implementation that supports it, or NULL
1339 */
1340 public static BasicSupport getSupport(URL url) {
1341 if (url == null) {
1342 return null;
1343 }
1344
1345 // TEXT and INFO_TEXT always support files (not URLs though)
1346 for (SupportType type : SupportType.values()) {
1347 if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1348 BasicSupport support = getSupport(type);
1349 if (support != null && support.supports(url)) {
1350 return support;
1351 }
1352 }
1353 }
1354
1355 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
1356 SupportType.TEXT }) {
1357 BasicSupport support = getSupport(type);
1358 if (support != null && support.supports(url)) {
1359 return support;
1360 }
1361 }
1362
1363 return null;
1364 }
1365
1366 /**
1367 * Return a {@link BasicSupport} implementation supporting the given type.
1368 *
1369 * @param type
1370 * the type
1371 *
1372 * @return an implementation that supports it, or NULL
1373 */
1374 public static BasicSupport getSupport(SupportType type) {
1375 switch (type) {
1376 case EPUB:
1377 return new Epub().setType(type);
1378 case INFO_TEXT:
1379 return new InfoText().setType(type);
1380 case FIMFICTION:
1381 return new Fimfiction().setType(type);
1382 case FANFICTION:
1383 return new Fanfiction().setType(type);
1384 case TEXT:
1385 return new Text().setType(type);
1386 case MANGAFOX:
1387 return new MangaFox().setType(type);
1388 case E621:
1389 return new E621().setType(type);
1390 case YIFFSTAR:
1391 return new YiffStar().setType(type);
1392 case CBZ:
1393 return new Cbz().setType(type);
1394 case HTML:
1395 return new Html().setType(type);
1396 }
1397
1398 return null;
1399 }
1400
1401 /**
1402 * Return the first line from the given input which correspond to the given
1403 * selectors.
1404 *
1405 * @param in
1406 * the input
1407 * @param needle
1408 * a string that must be found inside the target line (also
1409 * supports "^" at start to say "only if it starts with" the
1410 * needle)
1411 * @param relativeLine
1412 * the line to return based upon the target line position (-1 =
1413 * the line before, 0 = the target line...)
1414 *
1415 * @return the line
1416 */
1417 static String getLine(InputStream in, String needle, int relativeLine) {
1418 return getLine(in, needle, relativeLine, true);
1419 }
1420
1421 /**
1422 * Return a line from the given input which correspond to the given
1423 * selectors.
1424 *
1425 * @param in
1426 * the input
1427 * @param needle
1428 * a string that must be found inside the target line (also
1429 * supports "^" at start to say "only if it starts with" the
1430 * needle)
1431 * @param relativeLine
1432 * the line to return based upon the target line position (-1 =
1433 * the line before, 0 = the target line...)
1434 * @param first
1435 * takes the first result (as opposed to the last one, which will
1436 * also always spend the input)
1437 *
1438 * @return the line
1439 */
1440 static String getLine(InputStream in, String needle, int relativeLine,
1441 boolean first) {
1442 String rep = null;
1443
1444 try {
1445 in.reset();
1446 } catch (IOException e) {
1447 Instance.syserr(e);
1448 }
1449
1450 List<String> lines = new ArrayList<String>();
1451 @SuppressWarnings("resource")
1452 Scanner scan = new Scanner(in, "UTF-8");
1453 int index = -1;
1454 scan.useDelimiter("\\n");
1455 while (scan.hasNext()) {
1456 lines.add(scan.next());
1457
1458 if (index == -1) {
1459 if (needle.startsWith("^")) {
1460 if (lines.get(lines.size() - 1).startsWith(
1461 needle.substring(1))) {
1462 index = lines.size() - 1;
1463 }
1464
1465 } else {
1466 if (lines.get(lines.size() - 1).contains(needle)) {
1467 index = lines.size() - 1;
1468 }
1469 }
1470 }
1471
1472 if (index >= 0 && index + relativeLine < lines.size()) {
1473 rep = lines.get(index + relativeLine);
1474 if (first) {
1475 break;
1476 }
1477 }
1478 }
1479
1480 return rep;
1481 }
1482 }