Move default tmp dirs, fix BLANK handling
[fanfix.git] / src / be / nikiroo / fanfix / supported / BasicSupport.java
CommitLineData
08fe2e33
NR
1package be.nikiroo.fanfix.supported;
2
68686a37 3import java.awt.image.BufferedImage;
68e370a4 4import java.io.BufferedReader;
08fe2e33
NR
5import java.io.ByteArrayInputStream;
6import java.io.File;
7import java.io.IOException;
8import java.io.InputStream;
68e370a4 9import java.io.InputStreamReader;
08fe2e33
NR
10import java.net.MalformedURLException;
11import java.net.URL;
08fe2e33
NR
12import java.util.ArrayList;
13import java.util.HashMap;
14import java.util.List;
15import java.util.Map;
16import java.util.Map.Entry;
17import java.util.Scanner;
18
19import be.nikiroo.fanfix.Instance;
20import be.nikiroo.fanfix.bundles.Config;
21import be.nikiroo.fanfix.bundles.StringId;
22import be.nikiroo.fanfix.data.Chapter;
23import be.nikiroo.fanfix.data.MetaData;
24import be.nikiroo.fanfix.data.Paragraph;
08fe2e33 25import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
9252c65e 26import be.nikiroo.fanfix.data.Story;
595dfa7a 27import be.nikiroo.utils.IOUtils;
3b2b638f 28import be.nikiroo.utils.Progress;
08fe2e33
NR
29import be.nikiroo.utils.StringUtils;
30
31/**
32 * This class is the base class used by the other support classes. It can be
33 * used outside of this package, and have static method that you can use to get
34 * access to the correct support class.
35 * <p>
36 * It will be used with 'resources' (usually web pages or files).
37 *
38 * @author niki
39 */
40public abstract class BasicSupport {
41 /**
42 * The supported input types for which we can get a {@link BasicSupport}
43 * object.
44 *
45 * @author niki
46 */
47 public enum SupportType {
48 /** EPUB files created with this program */
49 EPUB,
50 /** Pure text file with some rules */
51 TEXT,
52 /** TEXT but with associated .info file */
53 INFO_TEXT,
54 /** My Little Pony fanfictions */
55 FIMFICTION,
56 /** Fanfictions from a lot of different universes */
57 FANFICTION,
58 /** Website with lots of Mangas */
59 MANGAFOX,
60 /** Furry website with comics support */
61 E621,
62 /** CBZ files */
63 CBZ;
64
65 /**
66 * A description of this support type (more information than the
67 * {@link BasicSupport#getSourceName()}).
68 *
69 * @return the description
70 */
71 public String getDesc() {
72 String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
73 this.name());
74
75 if (desc == null) {
76 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
77 }
78
79 return desc;
80 }
81
82 /**
83 * The name of this support type (a short version).
84 *
85 * @return the name
86 */
87 public String getSourceName() {
88 BasicSupport support = BasicSupport.getSupport(this);
89 if (support != null) {
90 return support.getSourceName();
91 }
92
93 return null;
94 }
95
96 @Override
97 public String toString() {
98 return super.toString().toLowerCase();
99 }
100
101 /**
102 * Call {@link SupportType#valueOf(String.toUpperCase())}.
103 *
104 * @param typeName
105 * the possible type name
106 *
107 * @return NULL or the type
108 */
109 public static SupportType valueOfUC(String typeName) {
110 return SupportType.valueOf(typeName == null ? null : typeName
111 .toUpperCase());
112 }
113
114 /**
115 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
116 * NULL for NULL instead of raising exception.
117 *
118 * @param typeName
119 * the possible type name
120 *
121 * @return NULL or the type
122 */
123 public static SupportType valueOfNullOkUC(String typeName) {
124 if (typeName == null) {
125 return null;
126 }
127
128 return SupportType.valueOfUC(typeName);
129 }
130
131 /**
132 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
133 * NULL in case of error instead of raising an exception.
134 *
135 * @param typeName
136 * the possible type name
137 *
138 * @return NULL or the type
139 */
140 public static SupportType valueOfAllOkUC(String typeName) {
141 try {
142 return SupportType.valueOfUC(typeName);
143 } catch (Exception e) {
144 return null;
145 }
146 }
147 }
148
08fe2e33
NR
149 private InputStream in;
150 private SupportType type;
151 private URL currentReferer; // with on 'r', as in 'HTTP'...
152
153 // quote chars
154 private char openQuote = Instance.getTrans().getChar(
155 StringId.OPEN_SINGLE_QUOTE);
156 private char closeQuote = Instance.getTrans().getChar(
157 StringId.CLOSE_SINGLE_QUOTE);
158 private char openDoubleQuote = Instance.getTrans().getChar(
159 StringId.OPEN_DOUBLE_QUOTE);
160 private char closeDoubleQuote = Instance.getTrans().getChar(
161 StringId.CLOSE_DOUBLE_QUOTE);
162
163 /**
164 * The name of this support class.
165 *
166 * @return the name
167 */
168 protected abstract String getSourceName();
169
170 /**
171 * Check if the given resource is supported by this {@link BasicSupport}.
172 *
173 * @param url
174 * the resource to check for
175 *
176 * @return TRUE if it is
177 */
178 protected abstract boolean supports(URL url);
179
180 /**
181 * Return TRUE if the support will return HTML encoded content values for
182 * the chapters content.
183 *
184 * @return TRUE for HTML
185 */
186 protected abstract boolean isHtml();
187
68686a37 188 protected abstract MetaData getMeta(URL source, InputStream in)
08fe2e33
NR
189 throws IOException;
190
191 /**
192 * Return the story description.
193 *
194 * @param source
195 * the source of the story
196 * @param in
197 * the input (the main resource)
198 *
199 * @return the description
200 *
201 * @throws IOException
202 * in case of I/O error
203 */
204 protected abstract String getDesc(URL source, InputStream in)
205 throws IOException;
206
08fe2e33
NR
207 /**
208 * Return the list of chapters (name and resource).
209 *
210 * @param source
211 * the source of the story
212 * @param in
213 * the input (the main resource)
214 *
215 * @return the chapters
216 *
217 * @throws IOException
218 * in case of I/O error
219 */
220 protected abstract List<Entry<String, URL>> getChapters(URL source,
221 InputStream in) throws IOException;
222
223 /**
224 * Return the content of the chapter (possibly HTML encoded, if
225 * {@link BasicSupport#isHtml()} is TRUE).
226 *
227 * @param source
228 * the source of the story
229 * @param in
230 * the input (the main resource)
231 * @param number
232 * the chapter number
233 *
234 * @return the content
235 *
236 * @throws IOException
237 * in case of I/O error
238 */
239 protected abstract String getChapterContent(URL source, InputStream in,
240 int number) throws IOException;
241
08fe2e33
NR
242 /**
243 * Return the list of cookies (values included) that must be used to
244 * correctly fetch the resources.
245 * <p>
246 * You are expected to call the super method implementation if you override
247 * it.
248 *
249 * @return the cookies
250 */
251 public Map<String, String> getCookies() {
252 return new HashMap<String, String>();
253 }
254
255 /**
256 * Process the given story resource into a partially filled {@link Story}
257 * object containing the name and metadata, except for the description.
258 *
259 * @param url
260 * the story resource
261 *
262 * @return the {@link Story}
263 *
264 * @throws IOException
265 * in case of I/O error
266 */
267 public Story processMeta(URL url) throws IOException {
268 return processMeta(url, true, false);
269 }
270
271 /**
272 * Process the given story resource into a partially filled {@link Story}
273 * object containing the name and metadata.
274 *
275 * @param url
276 * the story resource
277 *
278 * @param close
279 * close "this" and "in" when done
280 *
281 * @return the {@link Story}
282 *
283 * @throws IOException
284 * in case of I/O error
285 */
286 protected Story processMeta(URL url, boolean close, boolean getDesc)
287 throws IOException {
288 in = Instance.getCache().open(url, this, false);
289 if (in == null) {
290 return null;
291 }
292
293 try {
68686a37 294 preprocess(url, getInput());
08fe2e33
NR
295
296 Story story = new Story();
68686a37
NR
297 MetaData meta = getMeta(url, getInput());
298 story.setMeta(meta);
299
300 if (meta != null && meta.getCover() == null) {
301 meta.setCover(getDefaultCover(meta.getSubject()));
302 }
08fe2e33
NR
303
304 if (getDesc) {
305 String descChapterName = Instance.getTrans().getString(
306 StringId.DESCRIPTION);
307 story.getMeta().setResume(
308 makeChapter(url, 0, descChapterName,
309 getDesc(url, getInput())));
310 }
311
312 return story;
313 } finally {
314 if (close) {
315 try {
316 close();
317 } catch (IOException e) {
318 Instance.syserr(e);
319 }
320
321 if (in != null) {
322 in.close();
323 }
324 }
325 }
326 }
327
328 /**
329 * Process the given story resource into a fully filled {@link Story}
330 * object.
331 *
332 * @param url
333 * the story resource
92fb0719
NR
334 * @param pg
335 * the optional progress reporter
08fe2e33
NR
336 *
337 * @return the {@link Story}
338 *
339 * @throws IOException
340 * in case of I/O error
341 */
92fb0719
NR
342 public Story process(URL url, Progress pg) throws IOException {
343 if (pg == null) {
344 pg = new Progress();
345 } else {
346 pg.setMinMax(0, 100);
347 }
348
08fe2e33
NR
349 setCurrentReferer(url);
350
92fb0719 351 pg.setProgress(1);
08fe2e33
NR
352 try {
353 Story story = processMeta(url, false, true);
92fb0719 354 pg.setProgress(10);
08fe2e33 355 if (story == null) {
92fb0719 356 pg.setProgress(100);
08fe2e33
NR
357 return null;
358 }
359
360 story.setChapters(new ArrayList<Chapter>());
361
08fe2e33 362 List<Entry<String, URL>> chapters = getChapters(url, getInput());
92fb0719
NR
363 pg.setProgress(20);
364
08fe2e33
NR
365 int i = 1;
366 if (chapters != null) {
92fb0719
NR
367 Progress pgChaps = new Progress(0, chapters.size());
368 pg.addProgress(pgChaps, 80);
369
08fe2e33
NR
370 for (Entry<String, URL> chap : chapters) {
371 setCurrentReferer(chap.getValue());
372 InputStream chapIn = Instance.getCache().open(
373 chap.getValue(), this, true);
374 try {
375 story.getChapters().add(
376 makeChapter(url, i, chap.getKey(),
377 getChapterContent(url, chapIn, i)));
378 } finally {
379 chapIn.close();
380 }
a6395bef 381
3b2b638f 382 pgChaps.setProgress(i++);
08fe2e33 383 }
92fb0719
NR
384 } else {
385 pg.setProgress(100);
08fe2e33
NR
386 }
387
388 return story;
389
390 } finally {
391 try {
392 close();
393 } catch (IOException e) {
394 Instance.syserr(e);
395 }
396
397 if (in != null) {
398 in.close();
399 }
400
401 currentReferer = null;
402 }
403 }
404
405 /**
406 * The support type.$
407 *
408 * @return the type
409 */
410 public SupportType getType() {
411 return type;
412 }
413
414 /**
415 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
416 * the current {@link URL} we work on.
417 *
418 * @return the referer
419 */
420 public URL getCurrentReferer() {
421 return currentReferer;
422 }
423
424 /**
425 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
426 * the current {@link URL} we work on.
427 *
428 * @param currentReferer
429 * the new referer
430 */
431 protected void setCurrentReferer(URL currentReferer) {
432 this.currentReferer = currentReferer;
433 }
434
435 /**
436 * The support type.
437 *
438 * @param type
439 * the new type
440 *
441 * @return this
442 */
443 protected BasicSupport setType(SupportType type) {
444 this.type = type;
445 return this;
446 }
447
448 /**
68686a37 449 * Prepare the support if needed before processing.
08fe2e33
NR
450 *
451 * @param source
452 * the source of the story
453 * @param in
454 * the input (the main resource)
455 *
08fe2e33
NR
456 * @throws IOException
457 * on I/O error
458 */
68686a37 459 protected void preprocess(URL source, InputStream in) throws IOException {
08fe2e33
NR
460 }
461
462 /**
463 * Now that we have processed the {@link Story}, close the resources if any.
464 *
465 * @throws IOException
466 * on I/O error
467 */
468 protected void close() throws IOException {
469 }
470
471 /**
472 * Create a {@link Chapter} object from the given information, formatting
473 * the content as it should be.
474 *
475 * @param number
476 * the chapter number
477 * @param name
478 * the chapter name
479 * @param content
480 * the chapter content
481 *
482 * @return the {@link Chapter}
483 *
484 * @throws IOException
485 * in case of I/O error
486 */
487 protected Chapter makeChapter(URL source, int number, String name,
488 String content) throws IOException {
08fe2e33
NR
489 // Chapter name: process it correctly, then remove the possible
490 // redundant "Chapter x: " in front of it
491 String chapterName = processPara(name).getContent().trim();
492 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
493 .split(",")) {
494 String chapterWord = Instance.getConfig().getStringX(
495 Config.CHAPTER, lang);
496 if (chapterName.startsWith(chapterWord)) {
497 chapterName = chapterName.substring(chapterWord.length())
498 .trim();
499 break;
500 }
501 }
502
503 if (chapterName.startsWith(Integer.toString(number))) {
504 chapterName = chapterName.substring(
505 Integer.toString(number).length()).trim();
506 }
507
508 if (chapterName.startsWith(":")) {
509 chapterName = chapterName.substring(1).trim();
510 }
511 //
512
513 Chapter chap = new Chapter(number, chapterName);
514
68e370a4
NR
515 if (content != null) {
516 chap.setParagraphs(makeParagraphs(source, content));
08fe2e33
NR
517 }
518
68e370a4
NR
519 return chap;
520
521 }
522
523 /**
524 * Convert the given content into {@link Paragraph}s.
525 *
526 * @param source
527 * the source URL of the story
528 * @param content
529 * the textual content
530 *
531 * @return the {@link Paragraph}s
532 *
533 * @throws IOException
534 * in case of I/O error
535 */
536 protected List<Paragraph> makeParagraphs(URL source, String content)
537 throws IOException {
08fe2e33
NR
538 if (isHtml()) {
539 // Special <HR> processing:
540 content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
541 "\n* * *\n");
542 }
543
68e370a4 544 List<Paragraph> paras = new ArrayList<Paragraph>();
9252c65e 545 InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8"));
08fe2e33 546 try {
68e370a4
NR
547 BufferedReader buff = new BufferedReader(new InputStreamReader(in,
548 "UTF-8"));
549
550 for (String encodedLine = buff.readLine(); encodedLine != null; encodedLine = buff
551 .readLine()) {
552 String lines[];
553 if (isHtml()) {
554 lines = encodedLine.split("(<p>|</p>|<br>|<br/>|\\n)");
555 } else {
556 lines = new String[] { encodedLine };
08fe2e33
NR
557 }
558
68e370a4
NR
559 for (String aline : lines) {
560 String line = aline.trim();
561
562 URL image = null;
563 if (line.startsWith("[") && line.endsWith("]")) {
564 image = getImageUrl(this, source,
565 line.substring(1, line.length() - 1).trim());
566 }
567
568 if (image != null) {
569 paras.add(new Paragraph(image));
570 } else {
571 paras.add(processPara(line));
572 }
08fe2e33
NR
573 }
574 }
68e370a4
NR
575 } finally {
576 in.close();
577 }
08fe2e33 578
68e370a4
NR
579 // Check quotes for "bad" format
580 List<Paragraph> newParas = new ArrayList<Paragraph>();
581 for (Paragraph para : paras) {
582 newParas.addAll(requotify(para));
583 }
584 paras = newParas;
08fe2e33 585
68e370a4
NR
586 // Remove double blanks/brks
587 fixBlanksBreaks(paras);
08fe2e33 588
68e370a4
NR
589 return paras;
590 }
08fe2e33 591
68e370a4
NR
592 /**
593 * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
594 * those {@link Paragraph}s.
595 * <p>
596 * The resulting list will not contain a starting or trailing blank/break
597 * nor 2 blanks or breaks following each other.
598 *
599 * @param paras
600 * the list of {@link Paragraph}s to fix
601 */
602 protected void fixBlanksBreaks(List<Paragraph> paras) {
603 boolean space = false;
604 boolean brk = true;
605 for (int i = 0; i < paras.size(); i++) {
606 Paragraph para = paras.get(i);
607 boolean thisSpace = para.getType() == ParagraphType.BLANK;
608 boolean thisBrk = para.getType() == ParagraphType.BREAK;
609
610 if (i > 0 && space && thisBrk) {
611 paras.remove(i - 1);
612 i--;
613 } else if ((space || brk) && (thisSpace || thisBrk)) {
614 paras.remove(i);
615 i--;
08fe2e33
NR
616 }
617
68e370a4
NR
618 space = thisSpace;
619 brk = thisBrk;
620 }
08fe2e33 621
68e370a4
NR
622 // Remove blank/brk at start
623 if (paras.size() > 0
624 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
625 0).getType() == ParagraphType.BREAK)) {
626 paras.remove(0);
627 }
628
629 // Remove blank/brk at end
630 int last = paras.size() - 1;
631 if (paras.size() > 0
632 && (paras.get(last).getType() == ParagraphType.BLANK || paras
633 .get(last).getType() == ParagraphType.BREAK)) {
634 paras.remove(last);
08fe2e33
NR
635 }
636 }
637
68e370a4
NR
638 /**
639 * Get the default cover related to this subject (see <tt>.info</tt> files).
640 *
641 * @param subject
642 * the subject
643 *
644 * @return the cover if any, or NULL
645 */
68686a37
NR
646 static BufferedImage getDefaultCover(String subject) {
647 if (subject != null && !subject.isEmpty()
648 && Instance.getCoverDir() != null) {
649 try {
650 File fileCover = new File(Instance.getCoverDir(), subject);
333f0e7b 651 return getImage(null, fileCover.toURI().toURL(), subject);
68686a37
NR
652 } catch (MalformedURLException e) {
653 }
654 }
655
656 return null;
657 }
658
08fe2e33
NR
659 /**
660 * Return the list of supported image extensions.
661 *
662 * @return the extensions
663 */
68686a37 664 static String[] getImageExt(boolean emptyAllowed) {
08fe2e33
NR
665 if (emptyAllowed) {
666 return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
667 } else {
668 return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
669 }
670 }
671
333f0e7b
NR
672 static BufferedImage getImage(BasicSupport support, URL source, String line) {
673 URL url = getImageUrl(support, source, line);
68686a37
NR
674 if (url != null) {
675 InputStream in = null;
676 try {
677 in = Instance.getCache().open(url, getSupport(url), true);
595dfa7a 678 return IOUtils.toImage(in);
68686a37
NR
679 } catch (IOException e) {
680 } finally {
681 if (in != null) {
682 try {
683 in.close();
684 } catch (IOException e) {
685 }
686 }
687 }
688 }
689
690 return null;
691 }
692
08fe2e33
NR
693 /**
694 * Check if the given resource can be a local image or a remote image, then
695 * refresh the cache with it if it is.
696 *
697 * @param source
698 * the story source
699 * @param line
700 * the resource to check
701 *
702 * @return the image URL if found, or NULL
703 *
704 */
333f0e7b 705 static URL getImageUrl(BasicSupport support, URL source, String line) {
08fe2e33
NR
706 URL url = null;
707
68686a37
NR
708 if (line != null) {
709 // try for files
710 String path = null;
711 if (source != null) {
712 path = new File(source.getFile()).getParent();
713 try {
333f0e7b
NR
714 String basePath = new File(new File(path), line.trim())
715 .getAbsolutePath();
68686a37 716 for (String ext : getImageExt(true)) {
333f0e7b
NR
717 if (new File(basePath + ext).exists()) {
718 url = new File(basePath + ext).toURI().toURL();
68686a37 719 }
08fe2e33 720 }
68686a37
NR
721 } catch (Exception e) {
722 // Nothing to do here
08fe2e33 723 }
68686a37 724 }
08fe2e33 725
68686a37
NR
726 if (url == null) {
727 // try for URLs
728 try {
08fe2e33 729 for (String ext : getImageExt(true)) {
68686a37 730 if (Instance.getCache().check(new URL(line + ext))) {
08fe2e33 731 url = new URL(line + ext);
333f0e7b 732 break;
08fe2e33
NR
733 }
734 }
68686a37
NR
735
736 // try out of cache
737 if (url == null) {
738 for (String ext : getImageExt(true)) {
739 try {
740 url = new URL(line + ext);
333f0e7b 741 Instance.getCache().refresh(url, support, true);
68686a37
NR
742 break;
743 } catch (IOException e) {
744 // no image with this ext
745 url = null;
746 }
747 }
748 }
749 } catch (MalformedURLException e) {
750 // Not an url
08fe2e33 751 }
08fe2e33 752 }
08fe2e33 753
68686a37
NR
754 // refresh the cached file
755 if (url != null) {
756 try {
333f0e7b 757 Instance.getCache().refresh(url, support, true);
68686a37
NR
758 } catch (IOException e) {
759 // woops, broken image
760 url = null;
761 }
08fe2e33
NR
762 }
763 }
764
765 return url;
766 }
767
68686a37
NR
768 protected InputStream reset(InputStream in) {
769 try {
770 in.reset();
771 } catch (IOException e) {
772 }
773 return in;
774 }
775
08fe2e33
NR
776 /**
777 * Reset then return {@link BasicSupport#in}.
778 *
779 * @return {@link BasicSupport#in}
08fe2e33 780 */
68686a37
NR
781 protected InputStream getInput() {
782 return reset(in);
08fe2e33
NR
783 }
784
785 /**
786 * Fix the author name if it is prefixed with some "by" {@link String}.
787 *
788 * @param author
789 * the author with a possible prefix
790 *
791 * @return the author without prefixes
792 */
68686a37 793 protected String fixAuthor(String author) {
08fe2e33
NR
794 if (author != null) {
795 for (String suffix : new String[] { " ", ":" }) {
796 for (String byString : Instance.getConfig()
797 .getString(Config.BYS).split(",")) {
798 byString += suffix;
799 if (author.toUpperCase().startsWith(byString.toUpperCase())) {
800 author = author.substring(byString.length()).trim();
801 }
802 }
803 }
804
805 // Special case (without suffix):
806 if (author.startsWith("©")) {
807 author = author.substring(1);
808 }
809 }
810
811 return author;
812 }
813
814 /**
815 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
816 * and requotify them (i.e., separate them into QUOTE paragraphs and other
817 * paragraphs (quotes or not)).
818 *
819 * @param para
820 * the paragraph to requotify (not necessaraly a quote)
821 *
822 * @return the correctly (or so we hope) quotified paragraphs
823 */
68e370a4 824 protected List<Paragraph> requotify(Paragraph para) {
08fe2e33
NR
825 List<Paragraph> newParas = new ArrayList<Paragraph>();
826
68686a37
NR
827 if (para.getType() == ParagraphType.QUOTE
828 && para.getContent().length() > 2) {
08fe2e33
NR
829 String line = para.getContent();
830 boolean singleQ = line.startsWith("" + openQuote);
831 boolean doubleQ = line.startsWith("" + openDoubleQuote);
832
b4dc6ab5
NR
833 // Do not try when more than one quote at a time
834 // (some stories are not easily readable if we do)
835 if (singleQ
836 && line.indexOf(closeQuote, 1) < line
837 .lastIndexOf(closeQuote)) {
838 newParas.add(para);
839 return newParas;
840 }
841 if (doubleQ
842 && line.indexOf(closeDoubleQuote, 1) < line
843 .lastIndexOf(closeDoubleQuote)) {
844 newParas.add(para);
845 return newParas;
846 }
847 //
848
08fe2e33
NR
849 if (!singleQ && !doubleQ) {
850 line = openDoubleQuote + line + closeDoubleQuote;
851 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
852 } else {
a6395bef 853 char open = singleQ ? openQuote : openDoubleQuote;
08fe2e33 854 char close = singleQ ? closeQuote : closeDoubleQuote;
a6395bef
NR
855
856 int posDot = -1;
857 boolean inQuote = false;
858 int i = 0;
859 for (char car : line.toCharArray()) {
860 if (car == open) {
861 inQuote = true;
862 } else if (car == close) {
863 inQuote = false;
864 } else if (car == '.' && !inQuote) {
865 posDot = i;
866 break;
867 }
868 i++;
08fe2e33
NR
869 }
870
871 if (posDot >= 0) {
872 String rest = line.substring(posDot + 1).trim();
873 line = line.substring(0, posDot + 1).trim();
874 newParas.add(new Paragraph(ParagraphType.QUOTE, line));
68686a37
NR
875 if (!rest.isEmpty()) {
876 newParas.addAll(requotify(processPara(rest)));
877 }
08fe2e33
NR
878 } else {
879 newParas.add(para);
880 }
881 }
882 } else {
883 newParas.add(para);
884 }
885
886 return newParas;
887 }
888
889 /**
890 * Process a {@link Paragraph} from a raw line of text.
891 * <p>
892 * Will also fix quotes and HTML encoding if needed.
893 *
894 * @param line
895 * the raw line
896 *
897 * @return the processed {@link Paragraph}
898 */
899 private Paragraph processPara(String line) {
900 line = ifUnhtml(line).trim();
901
902 boolean space = true;
903 boolean brk = true;
904 boolean quote = false;
905 boolean tentativeCloseQuote = false;
906 char prev = '\0';
907 int dashCount = 0;
908
909 StringBuilder builder = new StringBuilder();
910 for (char car : line.toCharArray()) {
911 if (car != '-') {
912 if (dashCount > 0) {
913 // dash, ndash and mdash: - – —
914 // currently: always use mdash
915 builder.append(dashCount == 1 ? '-' : '—');
916 }
917 dashCount = 0;
918 }
919
920 if (tentativeCloseQuote) {
921 tentativeCloseQuote = false;
922 if ((car >= 'a' && car <= 'z') || (car >= 'A' && car <= 'Z')
923 || (car >= '0' && car <= '9')) {
924 builder.append("'");
925 } else {
926 builder.append(closeQuote);
927 }
928 }
929
930 switch (car) {
931 case ' ': // note: unbreakable space
932 case ' ':
933 case '\t':
934 case '\n': // just in case
935 case '\r': // just in case
936 builder.append(' ');
937 break;
938
939 case '\'':
940 if (space || (brk && quote)) {
941 quote = true;
942 builder.append(openQuote);
943 } else if (prev == ' ') {
944 builder.append(openQuote);
945 } else {
946 // it is a quote ("I'm off") or a 'quote' ("This
947 // 'good' restaurant"...)
948 tentativeCloseQuote = true;
949 }
950 break;
951
952 case '"':
953 if (space || (brk && quote)) {
954 quote = true;
955 builder.append(openDoubleQuote);
956 } else if (prev == ' ') {
957 builder.append(openDoubleQuote);
958 } else {
959 builder.append(closeDoubleQuote);
960 }
961 break;
962
963 case '-':
964 if (space) {
965 quote = true;
966 } else {
967 dashCount++;
968 }
969 space = false;
970 break;
971
972 case '*':
973 case '~':
974 case '/':
975 case '\\':
976 case '<':
977 case '>':
978 case '=':
979 case '+':
980 case '_':
981 case '–':
982 case '—':
983 space = false;
984 builder.append(car);
985 break;
986
987 case '‘':
988 case '`':
989 case '‹':
990 case '﹁':
991 case '〈':
992 case '「':
993 if (space || (brk && quote)) {
994 quote = true;
995 builder.append(openQuote);
996 } else {
997 builder.append(openQuote);
998 }
999 space = false;
1000 brk = false;
1001 break;
1002
1003 case '’':
1004 case '›':
1005 case '﹂':
1006 case '〉':
1007 case '」':
1008 space = false;
1009 brk = false;
1010 builder.append(closeQuote);
1011 break;
1012
1013 case '«':
1014 case '“':
1015 case '﹃':
1016 case '《':
1017 case '『':
1018 if (space || (brk && quote)) {
1019 quote = true;
1020 builder.append(openDoubleQuote);
1021 } else {
1022 builder.append(openDoubleQuote);
1023 }
1024 space = false;
1025 brk = false;
1026 break;
1027
1028 case '»':
1029 case '”':
1030 case '﹄':
1031 case '》':
1032 case '』':
1033 space = false;
1034 brk = false;
1035 builder.append(closeDoubleQuote);
1036 break;
1037
1038 default:
1039 space = false;
1040 brk = false;
1041 builder.append(car);
1042 break;
1043 }
1044
1045 prev = car;
1046 }
1047
1048 if (tentativeCloseQuote) {
1049 tentativeCloseQuote = false;
1050 builder.append(closeQuote);
1051 }
1052
1053 line = builder.toString().trim();
1054
1055 ParagraphType type = ParagraphType.NORMAL;
1056 if (space) {
1057 type = ParagraphType.BLANK;
1058 } else if (brk) {
1059 type = ParagraphType.BREAK;
1060 } else if (quote) {
1061 type = ParagraphType.QUOTE;
1062 }
1063
1064 return new Paragraph(type, line);
1065 }
1066
1067 /**
1068 * Remove the HTML from the inpit <b>if</b> {@link BasicSupport#isHtml()} is
1069 * true.
1070 *
1071 * @param input
1072 * the input
1073 *
1074 * @return the no html version if needed
1075 */
1076 private String ifUnhtml(String input) {
1077 if (isHtml() && input != null) {
1078 return StringUtils.unhtml(input);
1079 }
1080
1081 return input;
1082 }
1083
1084 /**
1085 * Return a {@link BasicSupport} implementation supporting the given
1086 * resource if possible.
1087 *
1088 * @param url
1089 * the story resource
1090 *
1091 * @return an implementation that supports it, or NULL
1092 */
1093 public static BasicSupport getSupport(URL url) {
1094 if (url == null) {
1095 return null;
1096 }
1097
1098 // TEXT and INFO_TEXT always support files (not URLs though)
1099 for (SupportType type : SupportType.values()) {
1100 if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1101 BasicSupport support = getSupport(type);
1102 if (support != null && support.supports(url)) {
1103 return support;
1104 }
1105 }
1106 }
1107
1108 for (SupportType type : new SupportType[] { SupportType.TEXT,
1109 SupportType.INFO_TEXT }) {
1110 BasicSupport support = getSupport(type);
1111 if (support != null && support.supports(url)) {
1112 return support;
1113 }
1114 }
1115
1116 return null;
1117 }
1118
1119 /**
1120 * Return a {@link BasicSupport} implementation supporting the given type.
1121 *
1122 * @param type
1123 * the type
1124 *
1125 * @return an implementation that supports it, or NULL
1126 */
1127 public static BasicSupport getSupport(SupportType type) {
1128 switch (type) {
1129 case EPUB:
1130 return new Epub().setType(type);
1131 case INFO_TEXT:
1132 return new InfoText().setType(type);
1133 case FIMFICTION:
1134 return new Fimfiction().setType(type);
1135 case FANFICTION:
1136 return new Fanfiction().setType(type);
1137 case TEXT:
1138 return new Text().setType(type);
1139 case MANGAFOX:
1140 return new MangaFox().setType(type);
1141 case E621:
1142 return new E621().setType(type);
1143 case CBZ:
1144 return new Cbz().setType(type);
1145 }
1146
1147 return null;
1148 }
68686a37
NR
1149
1150 /**
1151 * Return the first line from the given input which correspond to the given
1152 * selectors.
1153 *
1154 * @param in
1155 * the input
1156 * @param needle
1157 * a string that must be found inside the target line (also
1158 * supports "^" at start to say "only if it starts with" the
1159 * needle)
1160 * @param relativeLine
1161 * the line to return based upon the target line position (-1 =
1162 * the line before, 0 = the target line...)
1163 *
1164 * @return the line
1165 */
1166 static String getLine(InputStream in, String needle, int relativeLine) {
1167 return getLine(in, needle, relativeLine, true);
1168 }
1169
1170 /**
1171 * Return a line from the given input which correspond to the given
1172 * selectors.
1173 *
1174 * @param in
1175 * the input
1176 * @param needle
1177 * a string that must be found inside the target line (also
1178 * supports "^" at start to say "only if it starts with" the
1179 * needle)
1180 * @param relativeLine
1181 * the line to return based upon the target line position (-1 =
1182 * the line before, 0 = the target line...)
1183 * @param first
1184 * takes the first result (as opposed to the last one, which will
1185 * also always spend the input)
1186 *
1187 * @return the line
1188 */
1189 static String getLine(InputStream in, String needle, int relativeLine,
1190 boolean first) {
1191 String rep = null;
1192
1193 try {
1194 in.reset();
1195 } catch (IOException e) {
1196 Instance.syserr(e);
1197 }
1198
1199 List<String> lines = new ArrayList<String>();
1200 @SuppressWarnings("resource")
1201 Scanner scan = new Scanner(in, "UTF-8");
1202 int index = -1;
1203 scan.useDelimiter("\\n");
1204 while (scan.hasNext()) {
1205 lines.add(scan.next());
1206
1207 if (index == -1) {
1208 if (needle.startsWith("^")) {
1209 if (lines.get(lines.size() - 1).startsWith(
1210 needle.substring(1))) {
1211 index = lines.size() - 1;
1212 }
1213
1214 } else {
1215 if (lines.get(lines.size() - 1).contains(needle)) {
1216 index = lines.size() - 1;
1217 }
1218 }
1219 }
1220
1221 if (index >= 0 && index + relativeLine < lines.size()) {
1222 rep = lines.get(index + relativeLine);
1223 if (first) {
1224 break;
1225 }
1226 }
1227 }
1228
1229 return rep;
1230 }
08fe2e33 1231}