Version 1.2.4: fixes, new "Re-download" UI option
[fanfix.git] / src / be / nikiroo / fanfix / supported / BasicSupport.java
CommitLineData
08fe2e33
NR
1package be.nikiroo.fanfix.supported;
2
68686a37 3import java.awt.image.BufferedImage;
68e370a4 4import java.io.BufferedReader;
08fe2e33
NR
5import java.io.ByteArrayInputStream;
6import java.io.File;
7import java.io.IOException;
8import java.io.InputStream;
68e370a4 9import java.io.InputStreamReader;
08fe2e33
NR
10import java.net.MalformedURLException;
11import java.net.URL;
08fe2e33
NR
12import java.util.ArrayList;
13import java.util.HashMap;
14import java.util.List;
15import java.util.Map;
16import java.util.Map.Entry;
17import java.util.Scanner;
18
19import be.nikiroo.fanfix.Instance;
20import be.nikiroo.fanfix.bundles.Config;
21import be.nikiroo.fanfix.bundles.StringId;
22import be.nikiroo.fanfix.data.Chapter;
23import be.nikiroo.fanfix.data.MetaData;
24import be.nikiroo.fanfix.data.Paragraph;
08fe2e33 25import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
9252c65e 26import be.nikiroo.fanfix.data.Story;
595dfa7a 27import be.nikiroo.utils.IOUtils;
3b2b638f 28import be.nikiroo.utils.Progress;
08fe2e33
NR
29import be.nikiroo.utils.StringUtils;
30
31/**
32 * This class is the base class used by the other support classes. It can be
33 * used outside of this package, and have static method that you can use to get
34 * access to the correct support class.
35 * <p>
36 * It will be used with 'resources' (usually web pages or files).
37 *
38 * @author niki
39 */
40public abstract class BasicSupport {
41 /**
42 * The supported input types for which we can get a {@link BasicSupport}
43 * object.
44 *
45 * @author niki
46 */
47 public enum SupportType {
48 /** EPUB files created with this program */
49 EPUB,
50 /** Pure text file with some rules */
51 TEXT,
52 /** TEXT but with associated .info file */
53 INFO_TEXT,
54 /** My Little Pony fanfictions */
55 FIMFICTION,
56 /** Fanfictions from a lot of different universes */
57 FANFICTION,
58 /** Website with lots of Mangas */
59 MANGAFOX,
60 /** Furry website with comics support */
61 E621,
62 /** CBZ files */
373da363
NR
63 CBZ,
64 /** HTML files */
65 HTML;
08fe2e33
NR
66
67 /**
68 * A description of this support type (more information than the
69 * {@link BasicSupport#getSourceName()}).
70 *
71 * @return the description
72 */
73 public String getDesc() {
74 String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
75 this.name());
76
77 if (desc == null) {
78 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
79 }
80
81 return desc;
82 }
83
84 /**
85 * The name of this support type (a short version).
86 *
87 * @return the name
88 */
89 public String getSourceName() {
90 BasicSupport support = BasicSupport.getSupport(this);
91 if (support != null) {
92 return support.getSourceName();
93 }
94
95 return null;
96 }
97
98 @Override
99 public String toString() {
100 return super.toString().toLowerCase();
101 }
102
103 /**
104 * Call {@link SupportType#valueOf(String.toUpperCase())}.
105 *
106 * @param typeName
107 * the possible type name
108 *
109 * @return NULL or the type
110 */
111 public static SupportType valueOfUC(String typeName) {
112 return SupportType.valueOf(typeName == null ? null : typeName
113 .toUpperCase());
114 }
115
116 /**
117 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
118 * NULL for NULL instead of raising exception.
119 *
120 * @param typeName
121 * the possible type name
122 *
123 * @return NULL or the type
124 */
125 public static SupportType valueOfNullOkUC(String typeName) {
126 if (typeName == null) {
127 return null;
128 }
129
130 return SupportType.valueOfUC(typeName);
131 }
132
133 /**
134 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
135 * NULL in case of error instead of raising an exception.
136 *
137 * @param typeName
138 * the possible type name
139 *
140 * @return NULL or the type
141 */
142 public static SupportType valueOfAllOkUC(String typeName) {
143 try {
144 return SupportType.valueOfUC(typeName);
145 } catch (Exception e) {
146 return null;
147 }
148 }
149 }
150
08fe2e33
NR
151 private InputStream in;
152 private SupportType type;
22848428 153 private URL currentReferer; // with only one 'r', as in 'HTTP'...
08fe2e33
NR
154
155 // quote chars
156 private char openQuote = Instance.getTrans().getChar(
157 StringId.OPEN_SINGLE_QUOTE);
158 private char closeQuote = Instance.getTrans().getChar(
159 StringId.CLOSE_SINGLE_QUOTE);
160 private char openDoubleQuote = Instance.getTrans().getChar(
161 StringId.OPEN_DOUBLE_QUOTE);
162 private char closeDoubleQuote = Instance.getTrans().getChar(
163 StringId.CLOSE_DOUBLE_QUOTE);
164
165 /**
166 * The name of this support class.
167 *
168 * @return the name
169 */
170 protected abstract String getSourceName();
171
172 /**
173 * Check if the given resource is supported by this {@link BasicSupport}.
174 *
175 * @param url
176 * the resource to check for
177 *
178 * @return TRUE if it is
179 */
180 protected abstract boolean supports(URL url);
181
182 /**
183 * Return TRUE if the support will return HTML encoded content values for
184 * the chapters content.
185 *
186 * @return TRUE for HTML
187 */
188 protected abstract boolean isHtml();
189
68686a37 190 protected abstract MetaData getMeta(URL source, InputStream in)
08fe2e33
NR
191 throws IOException;
192
193 /**
194 * Return the story description.
195 *
196 * @param source
197 * the source of the story
198 * @param in
199 * the input (the main resource)
200 *
201 * @return the description
202 *
203 * @throws IOException
204 * in case of I/O error
205 */
206 protected abstract String getDesc(URL source, InputStream in)
207 throws IOException;
208
08fe2e33
NR
209 /**
210 * Return the list of chapters (name and resource).
211 *
212 * @param source
213 * the source of the story
214 * @param in
215 * the input (the main resource)
216 *
217 * @return the chapters
218 *
219 * @throws IOException
220 * in case of I/O error
221 */
222 protected abstract List<Entry<String, URL>> getChapters(URL source,
223 InputStream in) throws IOException;
224
225 /**
226 * Return the content of the chapter (possibly HTML encoded, if
227 * {@link BasicSupport#isHtml()} is TRUE).
228 *
229 * @param source
230 * the source of the story
231 * @param in
232 * the input (the main resource)
233 * @param number
234 * the chapter number
235 *
236 * @return the content
237 *
238 * @throws IOException
239 * in case of I/O error
240 */
241 protected abstract String getChapterContent(URL source, InputStream in,
242 int number) throws IOException;
243
08fe2e33
NR
244 /**
245 * Return the list of cookies (values included) that must be used to
246 * correctly fetch the resources.
247 * <p>
248 * You are expected to call the super method implementation if you override
249 * it.
250 *
251 * @return the cookies
252 */
253 public Map<String, String> getCookies() {
254 return new HashMap<String, String>();
255 }
256
257 /**
258 * Process the given story resource into a partially filled {@link Story}
259 * object containing the name and metadata, except for the description.
260 *
261 * @param url
262 * the story resource
263 *
264 * @return the {@link Story}
265 *
266 * @throws IOException
267 * in case of I/O error
268 */
269 public Story processMeta(URL url) throws IOException {
270 return processMeta(url, true, false);
271 }
272
273 /**
274 * Process the given story resource into a partially filled {@link Story}
275 * object containing the name and metadata.
276 *
277 * @param url
278 * the story resource
279 *
280 * @param close
281 * close "this" and "in" when done
282 *
283 * @return the {@link Story}
284 *
285 * @throws IOException
286 * in case of I/O error
287 */
288 protected Story processMeta(URL url, boolean close, boolean getDesc)
289 throws IOException {
373da363 290 in = openInput(url);
08fe2e33
NR
291 if (in == null) {
292 return null;
293 }
294
295 try {
68686a37 296 preprocess(url, getInput());
08fe2e33
NR
297
298 Story story = new Story();
68686a37
NR
299 MetaData meta = getMeta(url, getInput());
300 story.setMeta(meta);
301
302 if (meta != null && meta.getCover() == null) {
303 meta.setCover(getDefaultCover(meta.getSubject()));
304 }
08fe2e33
NR
305
306 if (getDesc) {
307 String descChapterName = Instance.getTrans().getString(
308 StringId.DESCRIPTION);
309 story.getMeta().setResume(
310 makeChapter(url, 0, descChapterName,
311 getDesc(url, getInput())));
312 }
313
314 return story;
315 } finally {
316 if (close) {
317 try {
318 close();
319 } catch (IOException e) {
320 Instance.syserr(e);
321 }
322
323 if (in != null) {
324 in.close();
325 }
326 }
327 }
328 }
329
330 /**
331 * Process the given story resource into a fully filled {@link Story}
332 * object.
333 *
334 * @param url
335 * the story resource
92fb0719
NR
336 * @param pg
337 * the optional progress reporter
08fe2e33
NR
338 *
339 * @return the {@link Story}
340 *
341 * @throws IOException
342 * in case of I/O error
343 */
92fb0719
NR
344 public Story process(URL url, Progress pg) throws IOException {
345 if (pg == null) {
346 pg = new Progress();
347 } else {
348 pg.setMinMax(0, 100);
349 }
350
08fe2e33
NR
351 setCurrentReferer(url);
352
92fb0719 353 pg.setProgress(1);
08fe2e33
NR
354 try {
355 Story story = processMeta(url, false, true);
92fb0719 356 pg.setProgress(10);
08fe2e33 357 if (story == null) {
92fb0719 358 pg.setProgress(100);
08fe2e33
NR
359 return null;
360 }
361
362 story.setChapters(new ArrayList<Chapter>());
363
08fe2e33 364 List<Entry<String, URL>> chapters = getChapters(url, getInput());
92fb0719
NR
365 pg.setProgress(20);
366
08fe2e33
NR
367 int i = 1;
368 if (chapters != null) {
92fb0719
NR
369 Progress pgChaps = new Progress(0, chapters.size());
370 pg.addProgress(pgChaps, 80);
371
08fe2e33
NR
372 for (Entry<String, URL> chap : chapters) {
373 setCurrentReferer(chap.getValue());
374 InputStream chapIn = Instance.getCache().open(
375 chap.getValue(), this, true);
376 try {
377 story.getChapters().add(
378 makeChapter(url, i, chap.getKey(),
379 getChapterContent(url, chapIn, i)));
380 } finally {
381 chapIn.close();
382 }
a6395bef 383
3b2b638f 384 pgChaps.setProgress(i++);
08fe2e33 385 }
92fb0719
NR
386 } else {
387 pg.setProgress(100);
08fe2e33
NR
388 }
389
390 return story;
391
392 } finally {
393 try {
394 close();
395 } catch (IOException e) {
396 Instance.syserr(e);
397 }
398
399 if (in != null) {
400 in.close();
401 }
402
403 currentReferer = null;
404 }
405 }
406
407 /**
408 * The support type.$
409 *
410 * @return the type
411 */
412 public SupportType getType() {
413 return type;
414 }
415
416 /**
417 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
418 * the current {@link URL} we work on.
419 *
420 * @return the referer
421 */
422 public URL getCurrentReferer() {
423 return currentReferer;
424 }
425
426 /**
427 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
428 * the current {@link URL} we work on.
429 *
430 * @param currentReferer
431 * the new referer
432 */
433 protected void setCurrentReferer(URL currentReferer) {
434 this.currentReferer = currentReferer;
435 }
436
437 /**
438 * The support type.
439 *
440 * @param type
441 * the new type
442 *
443 * @return this
444 */
445 protected BasicSupport setType(SupportType type) {
446 this.type = type;
447 return this;
448 }
449
450 /**
68686a37 451 * Prepare the support if needed before processing.
08fe2e33
NR
452 *
453 * @param source
454 * the source of the story
455 * @param in
456 * the input (the main resource)
457 *
08fe2e33
NR
458 * @throws IOException
459 * on I/O error
460 */
68686a37 461 protected void preprocess(URL source, InputStream in) throws IOException {
08fe2e33
NR
462 }
463
464 /**
465 * Now that we have processed the {@link Story}, close the resources if any.
466 *
467 * @throws IOException
468 * on I/O error
469 */
470 protected void close() throws IOException {
471 }
472
473 /**
474 * Create a {@link Chapter} object from the given information, formatting
475 * the content as it should be.
476 *
477 * @param number
478 * the chapter number
479 * @param name
480 * the chapter name
481 * @param content
482 * the chapter content
483 *
484 * @return the {@link Chapter}
485 *
486 * @throws IOException
487 * in case of I/O error
488 */
489 protected Chapter makeChapter(URL source, int number, String name,
490 String content) throws IOException {
08fe2e33
NR
491 // Chapter name: process it correctly, then remove the possible
492 // redundant "Chapter x: " in front of it
493 String chapterName = processPara(name).getContent().trim();
494 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
495 .split(",")) {
496 String chapterWord = Instance.getConfig().getStringX(
497 Config.CHAPTER, lang);
498 if (chapterName.startsWith(chapterWord)) {
499 chapterName = chapterName.substring(chapterWord.length())
500 .trim();
501 break;
502 }
503 }
504
505 if (chapterName.startsWith(Integer.toString(number))) {
506 chapterName = chapterName.substring(
507 Integer.toString(number).length()).trim();
508 }
509
510 if (chapterName.startsWith(":")) {
511 chapterName = chapterName.substring(1).trim();
512 }
513 //
514
515 Chapter chap = new Chapter(number, chapterName);
516
68e370a4
NR
517 if (content != null) {
518 chap.setParagraphs(makeParagraphs(source, content));
08fe2e33
NR
519 }
520
68e370a4
NR
521 return chap;
522
523 }
524
525 /**
526 * Convert the given content into {@link Paragraph}s.
527 *
528 * @param source
529 * the source URL of the story
530 * @param content
531 * the textual content
532 *
533 * @return the {@link Paragraph}s
534 *
535 * @throws IOException
536 * in case of I/O error
537 */
538 protected List<Paragraph> makeParagraphs(URL source, String content)
539 throws IOException {
08fe2e33
NR
540 if (isHtml()) {
541 // Special <HR> processing:
542 content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
543 "\n* * *\n");
544 }
545
68e370a4 546 List<Paragraph> paras = new ArrayList<Paragraph>();
9252c65e 547 InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8"));
08fe2e33 548 try {
68e370a4
NR
549 BufferedReader buff = new BufferedReader(new InputStreamReader(in,
550 "UTF-8"));
551
552 for (String encodedLine = buff.readLine(); encodedLine != null; encodedLine = buff
553 .readLine()) {
554 String lines[];
555 if (isHtml()) {
556 lines = encodedLine.split("(<p>|</p>|<br>|<br/>|\\n)");
557 } else {
558 lines = new String[] { encodedLine };
08fe2e33
NR
559 }
560
68e370a4
NR
561 for (String aline : lines) {
562 String line = aline.trim();
563
564 URL image = null;
565 if (line.startsWith("[") && line.endsWith("]")) {
566 image = getImageUrl(this, source,
567 line.substring(1, line.length() - 1).trim());
568 }
569
570 if (image != null) {
571 paras.add(new Paragraph(image));
572 } else {
573 paras.add(processPara(line));
574 }
08fe2e33
NR
575 }
576 }
68e370a4
NR
577 } finally {
578 in.close();
579 }
08fe2e33 580
68e370a4
NR
581 // Check quotes for "bad" format
582 List<Paragraph> newParas = new ArrayList<Paragraph>();
583 for (Paragraph para : paras) {
584 newParas.addAll(requotify(para));
585 }
586 paras = newParas;
08fe2e33 587
68e370a4
NR
588 // Remove double blanks/brks
589 fixBlanksBreaks(paras);
08fe2e33 590
68e370a4
NR
591 return paras;
592 }
08fe2e33 593
68e370a4
NR
594 /**
595 * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
596 * those {@link Paragraph}s.
597 * <p>
598 * The resulting list will not contain a starting or trailing blank/break
599 * nor 2 blanks or breaks following each other.
600 *
601 * @param paras
602 * the list of {@link Paragraph}s to fix
603 */
604 protected void fixBlanksBreaks(List<Paragraph> paras) {
605 boolean space = false;
606 boolean brk = true;
607 for (int i = 0; i < paras.size(); i++) {
608 Paragraph para = paras.get(i);
609 boolean thisSpace = para.getType() == ParagraphType.BLANK;
610 boolean thisBrk = para.getType() == ParagraphType.BREAK;
611
612 if (i > 0 && space && thisBrk) {
613 paras.remove(i - 1);
614 i--;
615 } else if ((space || brk) && (thisSpace || thisBrk)) {
616 paras.remove(i);
617 i--;
08fe2e33
NR
618 }
619
68e370a4
NR
620 space = thisSpace;
621 brk = thisBrk;
622 }
08fe2e33 623
68e370a4
NR
624 // Remove blank/brk at start
625 if (paras.size() > 0
626 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
627 0).getType() == ParagraphType.BREAK)) {
628 paras.remove(0);
629 }
630
631 // Remove blank/brk at end
632 int last = paras.size() - 1;
633 if (paras.size() > 0
634 && (paras.get(last).getType() == ParagraphType.BLANK || paras
635 .get(last).getType() == ParagraphType.BREAK)) {
636 paras.remove(last);
08fe2e33
NR
637 }
638 }
639
68e370a4
NR
640 /**
641 * Get the default cover related to this subject (see <tt>.info</tt> files).
642 *
643 * @param subject
644 * the subject
645 *
646 * @return the cover if any, or NULL
647 */
68686a37
NR
648 static BufferedImage getDefaultCover(String subject) {
649 if (subject != null && !subject.isEmpty()
650 && Instance.getCoverDir() != null) {
651 try {
652 File fileCover = new File(Instance.getCoverDir(), subject);
333f0e7b 653 return getImage(null, fileCover.toURI().toURL(), subject);
68686a37
NR
654 } catch (MalformedURLException e) {
655 }
656 }
657
658 return null;
659 }
660
08fe2e33
NR
661 /**
662 * Return the list of supported image extensions.
663 *
664 * @return the extensions
665 */
68686a37 666 static String[] getImageExt(boolean emptyAllowed) {
08fe2e33
NR
667 if (emptyAllowed) {
668 return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
669 } else {
670 return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
671 }
672 }
673
333f0e7b
NR
674 static BufferedImage getImage(BasicSupport support, URL source, String line) {
675 URL url = getImageUrl(support, source, line);
68686a37
NR
676 if (url != null) {
677 InputStream in = null;
678 try {
679 in = Instance.getCache().open(url, getSupport(url), true);
595dfa7a 680 return IOUtils.toImage(in);
68686a37
NR
681 } catch (IOException e) {
682 } finally {
683 if (in != null) {
684 try {
685 in.close();
686 } catch (IOException e) {
687 }
688 }
689 }
690 }
691
692 return null;
693 }
694
08fe2e33
NR
695 /**
696 * Check if the given resource can be a local image or a remote image, then
697 * refresh the cache with it if it is.
698 *
699 * @param source
700 * the story source
701 * @param line
702 * the resource to check
703 *
704 * @return the image URL if found, or NULL
705 *
706 */
333f0e7b 707 static URL getImageUrl(BasicSupport support, URL source, String line) {
08fe2e33
NR
708 URL url = null;
709
68686a37
NR
710 if (line != null) {
711 // try for files
712 String path = null;
713 if (source != null) {
714 path = new File(source.getFile()).getParent();
715 try {
333f0e7b
NR
716 String basePath = new File(new File(path), line.trim())
717 .getAbsolutePath();
68686a37 718 for (String ext : getImageExt(true)) {
333f0e7b
NR
719 if (new File(basePath + ext).exists()) {
720 url = new File(basePath + ext).toURI().toURL();
68686a37 721 }
08fe2e33 722 }
68686a37
NR
723 } catch (Exception e) {
724 // Nothing to do here
08fe2e33 725 }
68686a37 726 }
08fe2e33 727
68686a37
NR
728 if (url == null) {
729 // try for URLs
730 try {
08fe2e33 731 for (String ext : getImageExt(true)) {
68686a37 732 if (Instance.getCache().check(new URL(line + ext))) {
08fe2e33 733 url = new URL(line + ext);
333f0e7b 734 break;
08fe2e33
NR
735 }
736 }
68686a37
NR
737
738 // try out of cache
739 if (url == null) {
740 for (String ext : getImageExt(true)) {
741 try {
742 url = new URL(line + ext);
333f0e7b 743 Instance.getCache().refresh(url, support, true);
68686a37
NR
744 break;
745 } catch (IOException e) {
746 // no image with this ext
747 url = null;
748 }
749 }
750 }
751 } catch (MalformedURLException e) {
752 // Not an url
08fe2e33 753 }
08fe2e33 754 }
08fe2e33 755
68686a37
NR
756 // refresh the cached file
757 if (url != null) {
758 try {
333f0e7b 759 Instance.getCache().refresh(url, support, true);
68686a37
NR
760 } catch (IOException e) {
761 // woops, broken image
762 url = null;
763 }
08fe2e33
NR
764 }
765 }
766
767 return url;
768 }
769
373da363
NR
770 /**
771 * Open the input file that will be used through the support.
772 *
773 * @param source
774 * the source {@link URL}
775 *
776 * @return the {@link InputStream}
777 *
778 * @throws IOException
779 * in case of I/O error
780 */
781 protected InputStream openInput(URL source) throws IOException {
782 return Instance.getCache().open(source, this, false);
783 }
784
68686a37
NR
785 protected InputStream reset(InputStream in) {
786 try {
787 in.reset();
788 } catch (IOException e) {
789 }
790 return in;
791 }
792
08fe2e33
NR
793 /**
794 * Reset then return {@link BasicSupport#in}.
795 *
796 * @return {@link BasicSupport#in}
08fe2e33 797 */
68686a37
NR
798 protected InputStream getInput() {
799 return reset(in);
08fe2e33
NR
800 }
801
802 /**
803 * Fix the author name if it is prefixed with some "by" {@link String}.
804 *
805 * @param author
806 * the author with a possible prefix
807 *
808 * @return the author without prefixes
809 */
68686a37 810 protected String fixAuthor(String author) {
08fe2e33
NR
811 if (author != null) {
812 for (String suffix : new String[] { " ", ":" }) {
813 for (String byString : Instance.getConfig()
814 .getString(Config.BYS).split(",")) {
815 byString += suffix;
816 if (author.toUpperCase().startsWith(byString.toUpperCase())) {
817 author = author.substring(byString.length()).trim();
818 }
819 }
820 }
821
822 // Special case (without suffix):
823 if (author.startsWith("©")) {
824 author = author.substring(1);
825 }
826 }
827
828 return author;
829 }
830
831 /**
832 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
833 * and requotify them (i.e., separate them into QUOTE paragraphs and other
834 * paragraphs (quotes or not)).
835 *
836 * @param para
837 * the paragraph to requotify (not necessaraly a quote)
838 *
839 * @return the correctly (or so we hope) quotified paragraphs
840 */
68e370a4 841 protected List<Paragraph> requotify(Paragraph para) {
08fe2e33
NR
842 List<Paragraph> newParas = new ArrayList<Paragraph>();
843
68686a37
NR
844 if (para.getType() == ParagraphType.QUOTE
845 && para.getContent().length() > 2) {
08fe2e33
NR
846 String line = para.getContent();
847 boolean singleQ = line.startsWith("" + openQuote);
848 boolean doubleQ = line.startsWith("" + openDoubleQuote);
849
b4dc6ab5
NR
850 // Do not try when more than one quote at a time
851 // (some stories are not easily readable if we do)
852 if (singleQ
853 && line.indexOf(closeQuote, 1) < line
854 .lastIndexOf(closeQuote)) {
855 newParas.add(para);
856 return newParas;
857 }
858 if (doubleQ
859 && line.indexOf(closeDoubleQuote, 1) < line
860 .lastIndexOf(closeDoubleQuote)) {
861 newParas.add(para);
862 return newParas;
863 }
864 //
865
08fe2e33
NR
866 if (!singleQ && !doubleQ) {
867 line = openDoubleQuote + line + closeDoubleQuote;
868 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
869 } else {
a6395bef 870 char open = singleQ ? openQuote : openDoubleQuote;
08fe2e33 871 char close = singleQ ? closeQuote : closeDoubleQuote;
a6395bef
NR
872
873 int posDot = -1;
874 boolean inQuote = false;
875 int i = 0;
876 for (char car : line.toCharArray()) {
877 if (car == open) {
878 inQuote = true;
879 } else if (car == close) {
880 inQuote = false;
881 } else if (car == '.' && !inQuote) {
882 posDot = i;
883 break;
884 }
885 i++;
08fe2e33
NR
886 }
887
888 if (posDot >= 0) {
889 String rest = line.substring(posDot + 1).trim();
890 line = line.substring(0, posDot + 1).trim();
891 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
68686a37
NR
892 if (!rest.isEmpty()) {
893 newParas.addAll(requotify(processPara(rest)));
894 }
08fe2e33
NR
895 } else {
896 newParas.add(para);
897 }
898 }
899 } else {
900 newParas.add(para);
901 }
902
903 return newParas;
904 }
905
906 /**
907 * Process a {@link Paragraph} from a raw line of text.
908 * <p>
909 * Will also fix quotes and HTML encoding if needed.
910 *
911 * @param line
912 * the raw line
913 *
914 * @return the processed {@link Paragraph}
915 */
22848428 916 protected Paragraph processPara(String line) {
08fe2e33
NR
917 line = ifUnhtml(line).trim();
918
919 boolean space = true;
920 boolean brk = true;
921 boolean quote = false;
922 boolean tentativeCloseQuote = false;
923 char prev = '\0';
924 int dashCount = 0;
925
926 StringBuilder builder = new StringBuilder();
927 for (char car : line.toCharArray()) {
928 if (car != '-') {
929 if (dashCount > 0) {
930 // dash, ndash and mdash: - – —
931 // currently: always use mdash
932 builder.append(dashCount == 1 ? '-' : '—');
933 }
934 dashCount = 0;
935 }
936
937 if (tentativeCloseQuote) {
938 tentativeCloseQuote = false;
22848428 939 if (Character.isLetterOrDigit(car)) {
08fe2e33
NR
940 builder.append("'");
941 } else {
22848428
NR
942 // handle double-single quotes as double quotes
943 if (prev == car) {
944 builder.append(closeDoubleQuote);
945 continue;
946 } else {
947 builder.append(closeQuote);
948 }
08fe2e33
NR
949 }
950 }
951
952 switch (car) {
953 case ' ': // note: unbreakable space
954 case ' ':
955 case '\t':
956 case '\n': // just in case
957 case '\r': // just in case
958 builder.append(' ');
959 break;
960
961 case '\'':
962 if (space || (brk && quote)) {
963 quote = true;
22848428
NR
964 // handle double-single quotes as double quotes
965 if (prev == car) {
966 builder.deleteCharAt(builder.length() - 1);
967 builder.append(openDoubleQuote);
968 } else {
969 builder.append(openQuote);
970 }
971 } else if (prev == ' ' || prev == car) {
972 // handle double-single quotes as double quotes
973 if (prev == car) {
974 builder.deleteCharAt(builder.length() - 1);
975 builder.append(openDoubleQuote);
976 } else {
977 builder.append(openQuote);
978 }
08fe2e33
NR
979 } else {
980 // it is a quote ("I'm off") or a 'quote' ("This
981 // 'good' restaurant"...)
982 tentativeCloseQuote = true;
983 }
984 break;
985
986 case '"':
987 if (space || (brk && quote)) {
988 quote = true;
989 builder.append(openDoubleQuote);
990 } else if (prev == ' ') {
991 builder.append(openDoubleQuote);
992 } else {
993 builder.append(closeDoubleQuote);
994 }
995 break;
996
997 case '-':
998 if (space) {
999 quote = true;
1000 } else {
1001 dashCount++;
1002 }
1003 space = false;
1004 break;
1005
1006 case '*':
1007 case '~':
1008 case '/':
1009 case '\\':
1010 case '<':
1011 case '>':
1012 case '=':
1013 case '+':
1014 case '_':
1015 case '–':
1016 case '—':
1017 space = false;
1018 builder.append(car);
1019 break;
1020
1021 case '‘':
1022 case '`':
1023 case '‹':
1024 case '﹁':
1025 case '〈':
1026 case '「':
1027 if (space || (brk && quote)) {
1028 quote = true;
1029 builder.append(openQuote);
1030 } else {
22848428
NR
1031 // handle double-single quotes as double quotes
1032 if (prev == car) {
1033 builder.deleteCharAt(builder.length() - 1);
1034 builder.append(openDoubleQuote);
1035 } else {
1036 builder.append(openQuote);
1037 }
08fe2e33
NR
1038 }
1039 space = false;
1040 brk = false;
1041 break;
1042
1043 case '’':
1044 case '›':
1045 case '﹂':
1046 case '〉':
1047 case '」':
1048 space = false;
1049 brk = false;
22848428
NR
1050 // handle double-single quotes as double quotes
1051 if (prev == car) {
1052 builder.deleteCharAt(builder.length() - 1);
1053 builder.append(closeDoubleQuote);
1054 } else {
1055 builder.append(closeQuote);
1056 }
08fe2e33
NR
1057 break;
1058
1059 case '«':
1060 case '“':
1061 case '﹃':
1062 case '《':
1063 case '『':
1064 if (space || (brk && quote)) {
1065 quote = true;
1066 builder.append(openDoubleQuote);
1067 } else {
1068 builder.append(openDoubleQuote);
1069 }
1070 space = false;
1071 brk = false;
1072 break;
1073
1074 case '»':
1075 case '”':
1076 case '﹄':
1077 case '》':
1078 case '』':
1079 space = false;
1080 brk = false;
1081 builder.append(closeDoubleQuote);
1082 break;
1083
1084 default:
1085 space = false;
1086 brk = false;
1087 builder.append(car);
1088 break;
1089 }
1090
1091 prev = car;
1092 }
1093
1094 if (tentativeCloseQuote) {
1095 tentativeCloseQuote = false;
1096 builder.append(closeQuote);
1097 }
1098
1099 line = builder.toString().trim();
1100
1101 ParagraphType type = ParagraphType.NORMAL;
1102 if (space) {
1103 type = ParagraphType.BLANK;
1104 } else if (brk) {
1105 type = ParagraphType.BREAK;
1106 } else if (quote) {
1107 type = ParagraphType.QUOTE;
1108 }
1109
1110 return new Paragraph(type, line);
1111 }
1112
1113 /**
1114 * Remove the HTML from the inpit <b>if</b> {@link BasicSupport#isHtml()} is
1115 * true.
1116 *
1117 * @param input
1118 * the input
1119 *
1120 * @return the no html version if needed
1121 */
1122 private String ifUnhtml(String input) {
1123 if (isHtml() && input != null) {
1124 return StringUtils.unhtml(input);
1125 }
1126
1127 return input;
1128 }
1129
1130 /**
1131 * Return a {@link BasicSupport} implementation supporting the given
1132 * resource if possible.
1133 *
1134 * @param url
1135 * the story resource
1136 *
1137 * @return an implementation that supports it, or NULL
1138 */
1139 public static BasicSupport getSupport(URL url) {
1140 if (url == null) {
1141 return null;
1142 }
1143
1144 // TEXT and INFO_TEXT always support files (not URLs though)
1145 for (SupportType type : SupportType.values()) {
1146 if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1147 BasicSupport support = getSupport(type);
1148 if (support != null && support.supports(url)) {
1149 return support;
1150 }
1151 }
1152 }
1153
373da363
NR
1154 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
1155 SupportType.TEXT }) {
08fe2e33
NR
1156 BasicSupport support = getSupport(type);
1157 if (support != null && support.supports(url)) {
1158 return support;
1159 }
1160 }
1161
1162 return null;
1163 }
1164
1165 /**
1166 * Return a {@link BasicSupport} implementation supporting the given type.
1167 *
1168 * @param type
1169 * the type
1170 *
1171 * @return an implementation that supports it, or NULL
1172 */
1173 public static BasicSupport getSupport(SupportType type) {
1174 switch (type) {
1175 case EPUB:
1176 return new Epub().setType(type);
1177 case INFO_TEXT:
1178 return new InfoText().setType(type);
1179 case FIMFICTION:
1180 return new Fimfiction().setType(type);
1181 case FANFICTION:
1182 return new Fanfiction().setType(type);
1183 case TEXT:
1184 return new Text().setType(type);
1185 case MANGAFOX:
1186 return new MangaFox().setType(type);
1187 case E621:
1188 return new E621().setType(type);
1189 case CBZ:
1190 return new Cbz().setType(type);
373da363
NR
1191 case HTML:
1192 return new Html().setType(type);
08fe2e33
NR
1193 }
1194
1195 return null;
1196 }
68686a37
NR
1197
1198 /**
1199 * Return the first line from the given input which correspond to the given
1200 * selectors.
1201 *
1202 * @param in
1203 * the input
1204 * @param needle
1205 * a string that must be found inside the target line (also
1206 * supports "^" at start to say "only if it starts with" the
1207 * needle)
1208 * @param relativeLine
1209 * the line to return based upon the target line position (-1 =
1210 * the line before, 0 = the target line...)
1211 *
1212 * @return the line
1213 */
1214 static String getLine(InputStream in, String needle, int relativeLine) {
1215 return getLine(in, needle, relativeLine, true);
1216 }
1217
1218 /**
1219 * Return a line from the given input which correspond to the given
1220 * selectors.
1221 *
1222 * @param in
1223 * the input
1224 * @param needle
1225 * a string that must be found inside the target line (also
1226 * supports "^" at start to say "only if it starts with" the
1227 * needle)
1228 * @param relativeLine
1229 * the line to return based upon the target line position (-1 =
1230 * the line before, 0 = the target line...)
1231 * @param first
1232 * takes the first result (as opposed to the last one, which will
1233 * also always spend the input)
1234 *
1235 * @return the line
1236 */
1237 static String getLine(InputStream in, String needle, int relativeLine,
1238 boolean first) {
1239 String rep = null;
1240
1241 try {
1242 in.reset();
1243 } catch (IOException e) {
1244 Instance.syserr(e);
1245 }
1246
1247 List<String> lines = new ArrayList<String>();
1248 @SuppressWarnings("resource")
1249 Scanner scan = new Scanner(in, "UTF-8");
1250 int index = -1;
1251 scan.useDelimiter("\\n");
1252 while (scan.hasNext()) {
1253 lines.add(scan.next());
1254
1255 if (index == -1) {
1256 if (needle.startsWith("^")) {
1257 if (lines.get(lines.size() - 1).startsWith(
1258 needle.substring(1))) {
1259 index = lines.size() - 1;
1260 }
1261
1262 } else {
1263 if (lines.get(lines.size() - 1).contains(needle)) {
1264 index = lines.size() - 1;
1265 }
1266 }
1267 }
1268
1269 if (index >= 0 && index + relativeLine < lines.size()) {
1270 rep = lines.get(index + relativeLine);
1271 if (first) {
1272 break;
1273 }
1274 }
1275 }
1276
1277 return rep;
1278 }
08fe2e33 1279}