Some jDoc fixes
[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / BasicSupport.java
CommitLineData
08fe2e33
NR
1package be.nikiroo.fanfix.supported;
2
68686a37 3import java.awt.image.BufferedImage;
68e370a4 4import java.io.BufferedReader;
08fe2e33
NR
5import java.io.ByteArrayInputStream;
6import java.io.File;
7import java.io.IOException;
8import java.io.InputStream;
68e370a4 9import java.io.InputStreamReader;
08fe2e33
NR
10import java.net.MalformedURLException;
11import java.net.URL;
08fe2e33 12import java.util.ArrayList;
793f1071 13import java.util.Date;
08fe2e33
NR
14import java.util.HashMap;
15import java.util.List;
16import java.util.Map;
17import java.util.Map.Entry;
18import java.util.Scanner;
19
20import be.nikiroo.fanfix.Instance;
21import be.nikiroo.fanfix.bundles.Config;
22import be.nikiroo.fanfix.bundles.StringId;
23import be.nikiroo.fanfix.data.Chapter;
24import be.nikiroo.fanfix.data.MetaData;
25import be.nikiroo.fanfix.data.Paragraph;
08fe2e33 26import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
9252c65e 27import be.nikiroo.fanfix.data.Story;
595dfa7a 28import be.nikiroo.utils.IOUtils;
3b2b638f 29import be.nikiroo.utils.Progress;
08fe2e33
NR
30import be.nikiroo.utils.StringUtils;
31
32/**
33 * This class is the base class used by the other support classes. It can be
34 * used outside of this package, and have static method that you can use to get
35 * access to the correct support class.
36 * <p>
37 * It will be used with 'resources' (usually web pages or files).
38 *
39 * @author niki
40 */
41public abstract class BasicSupport {
42 /**
43 * The supported input types for which we can get a {@link BasicSupport}
44 * object.
45 *
46 * @author niki
47 */
48 public enum SupportType {
49 /** EPUB files created with this program */
50 EPUB,
51 /** Pure text file with some rules */
52 TEXT,
53 /** TEXT but with associated .info file */
54 INFO_TEXT,
55 /** My Little Pony fanfictions */
56 FIMFICTION,
57 /** Fanfictions from a lot of different universes */
58 FANFICTION,
59 /** Website with lots of Mangas */
60 MANGAFOX,
61 /** Furry website with comics support */
62 E621,
a4143cd7
NR
63 /** Furry website with stories */
64 YIFFSTAR,
f0608ab1
NR
65 /** Comics and images groups, mostly but not only NSFW */
66 E_HENTAI,
08fe2e33 67 /** CBZ files */
373da363
NR
68 CBZ,
69 /** HTML files */
70 HTML;
08fe2e33
NR
71
72 /**
73 * A description of this support type (more information than the
74 * {@link BasicSupport#getSourceName()}).
75 *
76 * @return the description
77 */
78 public String getDesc() {
79 String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
80 this.name());
81
82 if (desc == null) {
83 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
84 }
85
86 return desc;
87 }
88
89 /**
90 * The name of this support type (a short version).
91 *
92 * @return the name
93 */
94 public String getSourceName() {
95 BasicSupport support = BasicSupport.getSupport(this);
96 if (support != null) {
97 return support.getSourceName();
98 }
99
100 return null;
101 }
102
103 @Override
104 public String toString() {
105 return super.toString().toLowerCase();
106 }
107
108 /**
0efd25e3
NR
109 * Call {@link SupportType#valueOf(String)} after conversion to upper
110 * case.
08fe2e33
NR
111 *
112 * @param typeName
113 * the possible type name
114 *
115 * @return NULL or the type
116 */
117 public static SupportType valueOfUC(String typeName) {
118 return SupportType.valueOf(typeName == null ? null : typeName
119 .toUpperCase());
120 }
121
122 /**
0efd25e3
NR
123 * Call {@link SupportType#valueOf(String)} after conversion to upper
124 * case but return NULL for NULL instead of raising exception.
08fe2e33
NR
125 *
126 * @param typeName
127 * the possible type name
128 *
129 * @return NULL or the type
130 */
131 public static SupportType valueOfNullOkUC(String typeName) {
132 if (typeName == null) {
133 return null;
134 }
135
136 return SupportType.valueOfUC(typeName);
137 }
138
139 /**
0efd25e3
NR
140 * Call {@link SupportType#valueOf(String)} after conversion to upper
141 * case but return NULL in case of error instead of raising an
142 * exception.
08fe2e33
NR
143 *
144 * @param typeName
145 * the possible type name
146 *
147 * @return NULL or the type
148 */
149 public static SupportType valueOfAllOkUC(String typeName) {
150 try {
151 return SupportType.valueOfUC(typeName);
152 } catch (Exception e) {
153 return null;
154 }
155 }
156 }
157
08fe2e33
NR
158 private InputStream in;
159 private SupportType type;
22848428 160 private URL currentReferer; // with only one 'r', as in 'HTTP'...
08fe2e33
NR
161
162 // quote chars
e8eeea0a 163 private char openQuote = Instance.getTrans().getCharacter(
08fe2e33 164 StringId.OPEN_SINGLE_QUOTE);
e8eeea0a 165 private char closeQuote = Instance.getTrans().getCharacter(
08fe2e33 166 StringId.CLOSE_SINGLE_QUOTE);
e8eeea0a 167 private char openDoubleQuote = Instance.getTrans().getCharacter(
08fe2e33 168 StringId.OPEN_DOUBLE_QUOTE);
e8eeea0a 169 private char closeDoubleQuote = Instance.getTrans().getCharacter(
08fe2e33
NR
170 StringId.CLOSE_DOUBLE_QUOTE);
171
172 /**
173 * The name of this support class.
174 *
175 * @return the name
176 */
177 protected abstract String getSourceName();
178
179 /**
180 * Check if the given resource is supported by this {@link BasicSupport}.
181 *
182 * @param url
183 * the resource to check for
184 *
185 * @return TRUE if it is
186 */
187 protected abstract boolean supports(URL url);
188
189 /**
190 * Return TRUE if the support will return HTML encoded content values for
191 * the chapters content.
192 *
193 * @return TRUE for HTML
194 */
195 protected abstract boolean isHtml();
196
0efd25e3
NR
197 /**
198 * Return the {@link MetaData} of this story.
199 *
200 * @param source
201 * the source of the story
202 * @param in
203 * the input (the main resource)
204 *
205 * @return the associated {@link MetaData}
206 *
207 * @throws IOException
208 * in case of I/O error
209 */
68686a37 210 protected abstract MetaData getMeta(URL source, InputStream in)
08fe2e33
NR
211 throws IOException;
212
213 /**
214 * Return the story description.
215 *
216 * @param source
217 * the source of the story
218 * @param in
219 * the input (the main resource)
220 *
221 * @return the description
222 *
223 * @throws IOException
224 * in case of I/O error
225 */
226 protected abstract String getDesc(URL source, InputStream in)
227 throws IOException;
228
08fe2e33
NR
229 /**
230 * Return the list of chapters (name and resource).
231 *
232 * @param source
233 * the source of the story
234 * @param in
235 * the input (the main resource)
ed08c171
NR
236 * @param pg
237 * the optional progress reporter
08fe2e33
NR
238 *
239 * @return the chapters
240 *
241 * @throws IOException
242 * in case of I/O error
243 */
244 protected abstract List<Entry<String, URL>> getChapters(URL source,
ed08c171 245 InputStream in, Progress pg) throws IOException;
08fe2e33
NR
246
247 /**
248 * Return the content of the chapter (possibly HTML encoded, if
249 * {@link BasicSupport#isHtml()} is TRUE).
250 *
251 * @param source
252 * the source of the story
253 * @param in
254 * the input (the main resource)
255 * @param number
256 * the chapter number
ed08c171
NR
257 * @param pg
258 * the optional progress reporter
08fe2e33
NR
259 *
260 * @return the content
261 *
262 * @throws IOException
263 * in case of I/O error
264 */
265 protected abstract String getChapterContent(URL source, InputStream in,
ed08c171 266 int number, Progress pg) throws IOException;
08fe2e33 267
6e06d2cc
NR
268 /**
269 * Log into the support (can be a no-op depending upon the support).
270 *
271 * @throws IOException
272 * in case of I/O error
273 */
274 public void login() throws IOException {
275
276 }
277
08fe2e33
NR
278 /**
279 * Return the list of cookies (values included) that must be used to
280 * correctly fetch the resources.
281 * <p>
282 * You are expected to call the super method implementation if you override
283 * it.
284 *
285 * @return the cookies
6e06d2cc
NR
286 *
287 * @throws IOException
288 * in case of I/O error
08fe2e33 289 */
6e06d2cc 290 public Map<String, String> getCookies() throws IOException {
08fe2e33
NR
291 return new HashMap<String, String>();
292 }
293
a4143cd7
NR
294 /**
295 * Return the canonical form of the main {@link URL}.
296 *
297 * @param source
298 * the source {@link URL}
299 *
300 * @return the canonical form of this {@link URL}
301 *
302 * @throws IOException
303 * in case of I/O error
304 */
305 public URL getCanonicalUrl(URL source) throws IOException {
306 return source;
307 }
308
08fe2e33
NR
309 /**
310 * Process the given story resource into a partially filled {@link Story}
311 * object containing the name and metadata, except for the description.
312 *
313 * @param url
314 * the story resource
315 *
316 * @return the {@link Story}
317 *
318 * @throws IOException
319 * in case of I/O error
320 */
321 public Story processMeta(URL url) throws IOException {
ed08c171 322 return processMeta(url, true, false, null);
08fe2e33
NR
323 }
324
325 /**
326 * Process the given story resource into a partially filled {@link Story}
327 * object containing the name and metadata.
328 *
329 * @param url
330 * the story resource
08fe2e33
NR
331 * @param close
332 * close "this" and "in" when done
0efd25e3
NR
333 * @param getDesc
334 * retrieve the description of the story, or not
ed08c171
NR
335 * @param pg
336 * the optional progress reporter
08fe2e33
NR
337 *
338 * @return the {@link Story}
339 *
340 * @throws IOException
341 * in case of I/O error
342 */
ed08c171
NR
343 protected Story processMeta(URL url, boolean close, boolean getDesc,
344 Progress pg) throws IOException {
345 if (pg == null) {
346 pg = new Progress();
347 } else {
348 pg.setMinMax(0, 100);
349 }
350
6e06d2cc 351 login();
ed08c171 352 pg.setProgress(10);
6e06d2cc 353
a4143cd7
NR
354 url = getCanonicalUrl(url);
355
356 setCurrentReferer(url);
357
373da363 358 in = openInput(url);
08fe2e33
NR
359 if (in == null) {
360 return null;
361 }
362
363 try {
68686a37 364 preprocess(url, getInput());
ed08c171 365 pg.setProgress(30);
08fe2e33
NR
366
367 Story story = new Story();
68686a37 368 MetaData meta = getMeta(url, getInput());
793f1071
NR
369 if (meta.getCreationDate() == null
370 || meta.getCreationDate().isEmpty()) {
371 meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
372 }
68686a37
NR
373 story.setMeta(meta);
374
ed08c171
NR
375 pg.setProgress(50);
376
68686a37
NR
377 if (meta != null && meta.getCover() == null) {
378 meta.setCover(getDefaultCover(meta.getSubject()));
379 }
08fe2e33 380
ed08c171
NR
381 pg.setProgress(60);
382
08fe2e33
NR
383 if (getDesc) {
384 String descChapterName = Instance.getTrans().getString(
385 StringId.DESCRIPTION);
386 story.getMeta().setResume(
387 makeChapter(url, 0, descChapterName,
ed08c171 388 getDesc(url, getInput()), null));
08fe2e33
NR
389 }
390
ed08c171 391 pg.setProgress(100);
08fe2e33
NR
392 return story;
393 } finally {
394 if (close) {
395 try {
396 close();
397 } catch (IOException e) {
398 Instance.syserr(e);
399 }
400
401 if (in != null) {
402 in.close();
403 }
404 }
a4143cd7
NR
405
406 setCurrentReferer(null);
08fe2e33
NR
407 }
408 }
409
410 /**
411 * Process the given story resource into a fully filled {@link Story}
412 * object.
413 *
414 * @param url
415 * the story resource
92fb0719
NR
416 * @param pg
417 * the optional progress reporter
08fe2e33
NR
418 *
419 * @return the {@link Story}
420 *
421 * @throws IOException
422 * in case of I/O error
423 */
92fb0719
NR
424 public Story process(URL url, Progress pg) throws IOException {
425 if (pg == null) {
426 pg = new Progress();
427 } else {
428 pg.setMinMax(0, 100);
429 }
430
a4143cd7 431 url = getCanonicalUrl(url);
92fb0719 432 pg.setProgress(1);
08fe2e33 433 try {
ed08c171
NR
434 Progress pgMeta = new Progress();
435 pg.addProgress(pgMeta, 10);
436 Story story = processMeta(url, false, true, pgMeta);
437 if (!pgMeta.isDone()) {
438 pgMeta.setProgress(pgMeta.getMax()); // 10%
439 }
440
08fe2e33 441 if (story == null) {
ed08c171 442 pg.setProgress(90);
08fe2e33
NR
443 return null;
444 }
445
754a5bc2
NR
446 pg.setName("Retrieving " + story.getMeta().getTitle());
447
a4143cd7
NR
448 setCurrentReferer(url);
449
ed08c171
NR
450 Progress pgGetChapters = new Progress();
451 pg.addProgress(pgGetChapters, 10);
08fe2e33 452 story.setChapters(new ArrayList<Chapter>());
ed08c171
NR
453 List<Entry<String, URL>> chapters = getChapters(url, getInput(),
454 pgGetChapters);
455 if (!pgGetChapters.isDone()) {
456 pgGetChapters.setProgress(pgGetChapters.getMax()); // 20%
457 }
08fe2e33 458
08fe2e33 459 if (chapters != null) {
ed08c171
NR
460 Progress pgChaps = new Progress("Extracting chapters", 0,
461 chapters.size() * 300);
92fb0719
NR
462 pg.addProgress(pgChaps, 80);
463
793f1071 464 long words = 0;
ed08c171 465 int i = 1;
08fe2e33 466 for (Entry<String, URL> chap : chapters) {
ed08c171 467 pgChaps.setName("Extracting chapter " + i);
08fe2e33
NR
468 setCurrentReferer(chap.getValue());
469 InputStream chapIn = Instance.getCache().open(
470 chap.getValue(), this, true);
ed08c171 471 pgChaps.setProgress(i * 100);
08fe2e33 472 try {
ed08c171
NR
473 Progress pgGetChapterContent = new Progress();
474 Progress pgMakeChapter = new Progress();
475 pgChaps.addProgress(pgGetChapterContent, 100);
476 pgChaps.addProgress(pgMakeChapter, 100);
477
478 String content = getChapterContent(url, chapIn, i,
479 pgGetChapterContent);
480 if (!pgGetChapterContent.isDone()) {
481 pgGetChapterContent.setProgress(pgGetChapterContent
482 .getMax());
483 }
484
793f1071 485 Chapter cc = makeChapter(url, i, chap.getKey(),
ed08c171
NR
486 content, pgMakeChapter);
487 if (!pgMakeChapter.isDone()) {
488 pgMakeChapter.setProgress(pgMakeChapter.getMax());
489 }
490
793f1071
NR
491 words += cc.getWords();
492 story.getChapters().add(cc);
493 if (story.getMeta() != null) {
494 story.getMeta().setWords(words);
495 }
08fe2e33
NR
496 } finally {
497 chapIn.close();
498 }
a6395bef 499
ed08c171 500 i++;
08fe2e33 501 }
ed08c171
NR
502
503 pgChaps.setName("Extracting chapters");
92fb0719 504 } else {
ed08c171 505 pg.setProgress(80);
08fe2e33
NR
506 }
507
508 return story;
509
510 } finally {
511 try {
512 close();
513 } catch (IOException e) {
514 Instance.syserr(e);
515 }
516
517 if (in != null) {
518 in.close();
519 }
520
a4143cd7 521 setCurrentReferer(null);
08fe2e33
NR
522 }
523 }
524
525 /**
a4143cd7 526 * The support type.
08fe2e33
NR
527 *
528 * @return the type
529 */
530 public SupportType getType() {
531 return type;
532 }
533
534 /**
535 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
536 * the current {@link URL} we work on.
537 *
538 * @return the referer
539 */
540 public URL getCurrentReferer() {
541 return currentReferer;
542 }
543
544 /**
545 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
546 * the current {@link URL} we work on.
547 *
548 * @param currentReferer
549 * the new referer
550 */
551 protected void setCurrentReferer(URL currentReferer) {
552 this.currentReferer = currentReferer;
553 }
554
555 /**
556 * The support type.
557 *
558 * @param type
559 * the new type
560 *
561 * @return this
562 */
563 protected BasicSupport setType(SupportType type) {
564 this.type = type;
565 return this;
566 }
567
568 /**
68686a37 569 * Prepare the support if needed before processing.
08fe2e33
NR
570 *
571 * @param source
572 * the source of the story
573 * @param in
574 * the input (the main resource)
575 *
08fe2e33
NR
576 * @throws IOException
577 * on I/O error
578 */
68686a37 579 protected void preprocess(URL source, InputStream in) throws IOException {
08fe2e33
NR
580 }
581
582 /**
583 * Now that we have processed the {@link Story}, close the resources if any.
584 *
585 * @throws IOException
586 * on I/O error
587 */
588 protected void close() throws IOException {
589 }
590
591 /**
592 * Create a {@link Chapter} object from the given information, formatting
593 * the content as it should be.
594 *
0efd25e3
NR
595 * @param source
596 * the source of the story
08fe2e33
NR
597 * @param number
598 * the chapter number
599 * @param name
600 * the chapter name
601 * @param content
602 * the chapter content
ed08c171
NR
603 * @param pg
604 * the optional progress reporter
08fe2e33
NR
605 *
606 * @return the {@link Chapter}
607 *
608 * @throws IOException
609 * in case of I/O error
610 */
611 protected Chapter makeChapter(URL source, int number, String name,
ed08c171 612 String content, Progress pg) throws IOException {
08fe2e33 613 // Chapter name: process it correctly, then remove the possible
f60df2f1
NR
614 // redundant "Chapter x: " in front of it, or "-" (as in
615 // "Chapter 5: - Fun!" after the ": " was automatically added)
08fe2e33
NR
616 String chapterName = processPara(name).getContent().trim();
617 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
618 .split(",")) {
619 String chapterWord = Instance.getConfig().getStringX(
620 Config.CHAPTER, lang);
621 if (chapterName.startsWith(chapterWord)) {
622 chapterName = chapterName.substring(chapterWord.length())
623 .trim();
624 break;
625 }
626 }
627
628 if (chapterName.startsWith(Integer.toString(number))) {
629 chapterName = chapterName.substring(
630 Integer.toString(number).length()).trim();
631 }
632
f60df2f1 633 while (chapterName.startsWith(":") || chapterName.startsWith("-")) {
08fe2e33
NR
634 chapterName = chapterName.substring(1).trim();
635 }
636 //
637
638 Chapter chap = new Chapter(number, chapterName);
639
68e370a4 640 if (content != null) {
ed08c171 641 List<Paragraph> paras = makeParagraphs(source, content, pg);
793f1071
NR
642 long words = 0;
643 for (Paragraph para : paras) {
644 words += para.getWords();
645 }
646 chap.setParagraphs(paras);
647 chap.setWords(words);
08fe2e33
NR
648 }
649
68e370a4
NR
650 return chap;
651
652 }
653
654 /**
655 * Convert the given content into {@link Paragraph}s.
656 *
657 * @param source
658 * the source URL of the story
659 * @param content
660 * the textual content
ed08c171
NR
661 * @param pg
662 * the optional progress reporter
68e370a4
NR
663 *
664 * @return the {@link Paragraph}s
665 *
666 * @throws IOException
667 * in case of I/O error
668 */
ed08c171
NR
669 protected List<Paragraph> makeParagraphs(URL source, String content,
670 Progress pg) throws IOException {
671 if (pg == null) {
672 pg = new Progress();
673 }
674
08fe2e33
NR
675 if (isHtml()) {
676 // Special <HR> processing:
677 content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
d5a7153c 678 "<br/>* * *<br/>");
08fe2e33
NR
679 }
680
68e370a4 681 List<Paragraph> paras = new ArrayList<Paragraph>();
08fe2e33 682
d5a7153c
NR
683 if (content != null && !content.trim().isEmpty()) {
684 if (isHtml()) {
ed08c171
NR
685 String[] tab = content.split("(<p>|</p>|<br>|<br/>)");
686 pg.setMinMax(0, tab.length);
687 int i = 1;
688 for (String line : tab) {
689 if (line.startsWith("[") && line.endsWith("]")) {
690 pg.setName("Extracting image " + i);
691 }
d5a7153c 692 paras.add(makeParagraph(source, line.trim()));
ed08c171 693 pg.setProgress(i++);
d5a7153c 694 }
ed08c171 695 pg.setName(null);
d5a7153c 696 } else {
ed08c171 697 List<String> lines = new ArrayList<String>();
d5a7153c
NR
698 BufferedReader buff = null;
699 try {
700 buff = new BufferedReader(
701 new InputStreamReader(new ByteArrayInputStream(
702 content.getBytes("UTF-8")), "UTF-8"));
703 for (String line = buff.readLine(); line != null; line = buff
704 .readLine()) {
ed08c171 705 lines.add(line.trim());
68e370a4 706 }
d5a7153c
NR
707 } finally {
708 if (buff != null) {
709 buff.close();
68e370a4 710 }
08fe2e33 711 }
ed08c171
NR
712
713 pg.setMinMax(0, lines.size());
714 int i = 0;
715 for (String line : lines) {
716 if (line.startsWith("[") && line.endsWith("]")) {
717 pg.setName("Extracting image " + i);
718 }
719 paras.add(makeParagraph(source, line));
720 pg.setProgress(i++);
721 }
722 pg.setName(null);
08fe2e33
NR
723 }
724
d5a7153c
NR
725 // Check quotes for "bad" format
726 List<Paragraph> newParas = new ArrayList<Paragraph>();
727 for (Paragraph para : paras) {
728 newParas.addAll(requotify(para));
729 }
730 paras = newParas;
08fe2e33 731
d5a7153c
NR
732 // Remove double blanks/brks
733 fixBlanksBreaks(paras);
734 }
08fe2e33 735
68e370a4
NR
736 return paras;
737 }
08fe2e33 738
d5a7153c
NR
739 /**
740 * Convert the given line into a single {@link Paragraph}.
741 *
742 * @param source
743 * the source URL of the story
744 * @param line
745 * the textual content of the paragraph
746 *
747 * @return the {@link Paragraph}
748 */
749 private Paragraph makeParagraph(URL source, String line) {
750 URL image = null;
751 if (line.startsWith("[") && line.endsWith("]")) {
752 image = getImageUrl(this, source,
753 line.substring(1, line.length() - 1).trim());
754 }
755
756 if (image != null) {
757 return new Paragraph(image);
758 } else {
759 return processPara(line);
760 }
761 }
762
68e370a4
NR
763 /**
764 * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
765 * those {@link Paragraph}s.
766 * <p>
767 * The resulting list will not contain a starting or trailing blank/break
768 * nor 2 blanks or breaks following each other.
769 *
770 * @param paras
771 * the list of {@link Paragraph}s to fix
772 */
773 protected void fixBlanksBreaks(List<Paragraph> paras) {
774 boolean space = false;
775 boolean brk = true;
776 for (int i = 0; i < paras.size(); i++) {
777 Paragraph para = paras.get(i);
778 boolean thisSpace = para.getType() == ParagraphType.BLANK;
779 boolean thisBrk = para.getType() == ParagraphType.BREAK;
780
781 if (i > 0 && space && thisBrk) {
782 paras.remove(i - 1);
783 i--;
784 } else if ((space || brk) && (thisSpace || thisBrk)) {
785 paras.remove(i);
786 i--;
08fe2e33
NR
787 }
788
68e370a4
NR
789 space = thisSpace;
790 brk = thisBrk;
791 }
08fe2e33 792
68e370a4
NR
793 // Remove blank/brk at start
794 if (paras.size() > 0
795 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
796 0).getType() == ParagraphType.BREAK)) {
797 paras.remove(0);
798 }
799
800 // Remove blank/brk at end
801 int last = paras.size() - 1;
802 if (paras.size() > 0
803 && (paras.get(last).getType() == ParagraphType.BLANK || paras
804 .get(last).getType() == ParagraphType.BREAK)) {
805 paras.remove(last);
08fe2e33
NR
806 }
807 }
808
68e370a4
NR
809 /**
810 * Get the default cover related to this subject (see <tt>.info</tt> files).
811 *
812 * @param subject
813 * the subject
814 *
815 * @return the cover if any, or NULL
816 */
68686a37
NR
817 static BufferedImage getDefaultCover(String subject) {
818 if (subject != null && !subject.isEmpty()
819 && Instance.getCoverDir() != null) {
820 try {
821 File fileCover = new File(Instance.getCoverDir(), subject);
333f0e7b 822 return getImage(null, fileCover.toURI().toURL(), subject);
68686a37
NR
823 } catch (MalformedURLException e) {
824 }
825 }
826
827 return null;
828 }
829
08fe2e33
NR
830 /**
831 * Return the list of supported image extensions.
832 *
a4143cd7
NR
833 * @param emptyAllowed
834 * TRUE to allow an empty extension on first place, which can be
835 * used when you may already have an extension in your input but
836 * are not sure about it
837 *
08fe2e33
NR
838 * @return the extensions
839 */
68686a37 840 static String[] getImageExt(boolean emptyAllowed) {
08fe2e33
NR
841 if (emptyAllowed) {
842 return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
843 } else {
844 return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
845 }
846 }
847
a4143cd7
NR
848 /**
849 * Check if the given resource can be a local image or a remote image, then
850 * refresh the cache with it if it is.
851 *
852 * @param source
853 * the story source
854 * @param line
855 * the resource to check
856 *
857 * @return the image if found, or NULL
858 *
859 */
333f0e7b
NR
860 static BufferedImage getImage(BasicSupport support, URL source, String line) {
861 URL url = getImageUrl(support, source, line);
68686a37
NR
862 if (url != null) {
863 InputStream in = null;
864 try {
865 in = Instance.getCache().open(url, getSupport(url), true);
595dfa7a 866 return IOUtils.toImage(in);
68686a37
NR
867 } catch (IOException e) {
868 } finally {
869 if (in != null) {
870 try {
871 in.close();
872 } catch (IOException e) {
873 }
874 }
875 }
876 }
877
878 return null;
879 }
880
08fe2e33
NR
881 /**
882 * Check if the given resource can be a local image or a remote image, then
883 * refresh the cache with it if it is.
884 *
885 * @param source
886 * the story source
887 * @param line
888 * the resource to check
889 *
890 * @return the image URL if found, or NULL
891 *
892 */
333f0e7b 893 static URL getImageUrl(BasicSupport support, URL source, String line) {
08fe2e33
NR
894 URL url = null;
895
68686a37
NR
896 if (line != null) {
897 // try for files
68686a37 898 if (source != null) {
68686a37 899 try {
2ab2e40a
NR
900
901 String relPath = null;
902 String absPath = null;
903 try {
904 String path = new File(source.getFile()).getParent();
905 relPath = new File(new File(path), line.trim())
906 .getAbsolutePath();
907 } catch (Exception e) {
908 // Cannot be converted to path (one possibility to take
909 // into account: absolute path on Windows)
910 }
911 try {
912 absPath = new File(line.trim()).getAbsolutePath();
913 } catch (Exception e) {
914 // Cannot be converted to path (at all)
915 }
916
68686a37 917 for (String ext : getImageExt(true)) {
2ab2e40a
NR
918 if (absPath != null && new File(absPath + ext).exists()) {
919 url = new File(absPath + ext).toURI().toURL();
920 } else if (relPath != null
921 && new File(relPath + ext).exists()) {
922 url = new File(relPath + ext).toURI().toURL();
68686a37 923 }
08fe2e33 924 }
68686a37 925 } catch (Exception e) {
2ab2e40a 926 // Should not happen since we control the correct arguments
08fe2e33 927 }
68686a37 928 }
08fe2e33 929
68686a37
NR
930 if (url == null) {
931 // try for URLs
932 try {
08fe2e33 933 for (String ext : getImageExt(true)) {
68686a37 934 if (Instance.getCache().check(new URL(line + ext))) {
08fe2e33 935 url = new URL(line + ext);
333f0e7b 936 break;
08fe2e33
NR
937 }
938 }
68686a37
NR
939
940 // try out of cache
941 if (url == null) {
942 for (String ext : getImageExt(true)) {
943 try {
944 url = new URL(line + ext);
333f0e7b 945 Instance.getCache().refresh(url, support, true);
68686a37
NR
946 break;
947 } catch (IOException e) {
948 // no image with this ext
949 url = null;
950 }
951 }
952 }
953 } catch (MalformedURLException e) {
954 // Not an url
08fe2e33 955 }
08fe2e33 956 }
08fe2e33 957
68686a37
NR
958 // refresh the cached file
959 if (url != null) {
960 try {
333f0e7b 961 Instance.getCache().refresh(url, support, true);
68686a37
NR
962 } catch (IOException e) {
963 // woops, broken image
964 url = null;
965 }
08fe2e33
NR
966 }
967 }
968
969 return url;
970 }
971
373da363
NR
972 /**
973 * Open the input file that will be used through the support.
974 *
975 * @param source
976 * the source {@link URL}
977 *
978 * @return the {@link InputStream}
979 *
980 * @throws IOException
981 * in case of I/O error
982 */
983 protected InputStream openInput(URL source) throws IOException {
984 return Instance.getCache().open(source, this, false);
985 }
986
a4143cd7
NR
987 /**
988 * Reset the given {@link InputStream} and return it.
989 *
990 * @param in
991 * the {@link InputStream} to reset
992 *
993 * @return the same {@link InputStream} after reset
994 */
68686a37
NR
995 protected InputStream reset(InputStream in) {
996 try {
997 in.reset();
998 } catch (IOException e) {
999 }
1000 return in;
1001 }
1002
08fe2e33
NR
1003 /**
1004 * Reset then return {@link BasicSupport#in}.
1005 *
1006 * @return {@link BasicSupport#in}
08fe2e33 1007 */
68686a37
NR
1008 protected InputStream getInput() {
1009 return reset(in);
08fe2e33
NR
1010 }
1011
1012 /**
1013 * Fix the author name if it is prefixed with some "by" {@link String}.
1014 *
1015 * @param author
1016 * the author with a possible prefix
1017 *
1018 * @return the author without prefixes
1019 */
68686a37 1020 protected String fixAuthor(String author) {
08fe2e33
NR
1021 if (author != null) {
1022 for (String suffix : new String[] { " ", ":" }) {
1023 for (String byString : Instance.getConfig()
1024 .getString(Config.BYS).split(",")) {
1025 byString += suffix;
1026 if (author.toUpperCase().startsWith(byString.toUpperCase())) {
1027 author = author.substring(byString.length()).trim();
1028 }
1029 }
1030 }
1031
1032 // Special case (without suffix):
1033 if (author.startsWith("©")) {
1034 author = author.substring(1);
1035 }
1036 }
1037
1038 return author;
1039 }
1040
1041 /**
1042 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
1043 * and requotify them (i.e., separate them into QUOTE paragraphs and other
1044 * paragraphs (quotes or not)).
1045 *
1046 * @param para
a4143cd7 1047 * the paragraph to requotify (not necessarily a quote)
08fe2e33
NR
1048 *
1049 * @return the correctly (or so we hope) quotified paragraphs
1050 */
68e370a4 1051 protected List<Paragraph> requotify(Paragraph para) {
08fe2e33
NR
1052 List<Paragraph> newParas = new ArrayList<Paragraph>();
1053
68686a37
NR
1054 if (para.getType() == ParagraphType.QUOTE
1055 && para.getContent().length() > 2) {
08fe2e33
NR
1056 String line = para.getContent();
1057 boolean singleQ = line.startsWith("" + openQuote);
1058 boolean doubleQ = line.startsWith("" + openDoubleQuote);
1059
b4dc6ab5
NR
1060 // Do not try when more than one quote at a time
1061 // (some stories are not easily readable if we do)
1062 if (singleQ
1063 && line.indexOf(closeQuote, 1) < line
1064 .lastIndexOf(closeQuote)) {
1065 newParas.add(para);
1066 return newParas;
1067 }
1068 if (doubleQ
1069 && line.indexOf(closeDoubleQuote, 1) < line
1070 .lastIndexOf(closeDoubleQuote)) {
1071 newParas.add(para);
1072 return newParas;
1073 }
1074 //
1075
08fe2e33
NR
1076 if (!singleQ && !doubleQ) {
1077 line = openDoubleQuote + line + closeDoubleQuote;
793f1071
NR
1078 newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
1079 .getWords()));
08fe2e33 1080 } else {
a6395bef 1081 char open = singleQ ? openQuote : openDoubleQuote;
08fe2e33 1082 char close = singleQ ? closeQuote : closeDoubleQuote;
a6395bef
NR
1083
1084 int posDot = -1;
1085 boolean inQuote = false;
1086 int i = 0;
1087 for (char car : line.toCharArray()) {
1088 if (car == open) {
1089 inQuote = true;
1090 } else if (car == close) {
1091 inQuote = false;
1092 } else if (car == '.' && !inQuote) {
1093 posDot = i;
1094 break;
1095 }
1096 i++;
08fe2e33
NR
1097 }
1098
1099 if (posDot >= 0) {
1100 String rest = line.substring(posDot + 1).trim();
1101 line = line.substring(0, posDot + 1).trim();
793f1071
NR
1102 long words = 1;
1103 for (char car : line.toCharArray()) {
1104 if (car == ' ') {
1105 words++;
1106 }
1107 }
1108 newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
68686a37
NR
1109 if (!rest.isEmpty()) {
1110 newParas.addAll(requotify(processPara(rest)));
1111 }
08fe2e33
NR
1112 } else {
1113 newParas.add(para);
1114 }
1115 }
1116 } else {
1117 newParas.add(para);
1118 }
1119
1120 return newParas;
1121 }
1122
1123 /**
1124 * Process a {@link Paragraph} from a raw line of text.
1125 * <p>
1126 * Will also fix quotes and HTML encoding if needed.
1127 *
1128 * @param line
1129 * the raw line
1130 *
1131 * @return the processed {@link Paragraph}
1132 */
22848428 1133 protected Paragraph processPara(String line) {
08fe2e33
NR
1134 line = ifUnhtml(line).trim();
1135
1136 boolean space = true;
1137 boolean brk = true;
1138 boolean quote = false;
1139 boolean tentativeCloseQuote = false;
1140 char prev = '\0';
1141 int dashCount = 0;
793f1071 1142 long words = 1;
08fe2e33
NR
1143
1144 StringBuilder builder = new StringBuilder();
1145 for (char car : line.toCharArray()) {
1146 if (car != '-') {
1147 if (dashCount > 0) {
1148 // dash, ndash and mdash: - – —
1149 // currently: always use mdash
1150 builder.append(dashCount == 1 ? '-' : '—');
1151 }
1152 dashCount = 0;
1153 }
1154
1155 if (tentativeCloseQuote) {
1156 tentativeCloseQuote = false;
22848428 1157 if (Character.isLetterOrDigit(car)) {
08fe2e33
NR
1158 builder.append("'");
1159 } else {
22848428
NR
1160 // handle double-single quotes as double quotes
1161 if (prev == car) {
1162 builder.append(closeDoubleQuote);
1163 continue;
1164 } else {
1165 builder.append(closeQuote);
1166 }
08fe2e33
NR
1167 }
1168 }
1169
1170 switch (car) {
1171 case ' ': // note: unbreakable space
1172 case ' ':
1173 case '\t':
1174 case '\n': // just in case
1175 case '\r': // just in case
793f1071
NR
1176 if (builder.length() > 0
1177 && builder.charAt(builder.length() - 1) != ' ') {
1178 words++;
1179 }
08fe2e33
NR
1180 builder.append(' ');
1181 break;
1182
1183 case '\'':
1184 if (space || (brk && quote)) {
1185 quote = true;
22848428
NR
1186 // handle double-single quotes as double quotes
1187 if (prev == car) {
1188 builder.deleteCharAt(builder.length() - 1);
1189 builder.append(openDoubleQuote);
1190 } else {
1191 builder.append(openQuote);
1192 }
1193 } else if (prev == ' ' || prev == car) {
1194 // handle double-single quotes as double quotes
1195 if (prev == car) {
1196 builder.deleteCharAt(builder.length() - 1);
1197 builder.append(openDoubleQuote);
1198 } else {
1199 builder.append(openQuote);
1200 }
08fe2e33
NR
1201 } else {
1202 // it is a quote ("I'm off") or a 'quote' ("This
1203 // 'good' restaurant"...)
1204 tentativeCloseQuote = true;
1205 }
1206 break;
1207
1208 case '"':
1209 if (space || (brk && quote)) {
1210 quote = true;
1211 builder.append(openDoubleQuote);
1212 } else if (prev == ' ') {
1213 builder.append(openDoubleQuote);
1214 } else {
1215 builder.append(closeDoubleQuote);
1216 }
1217 break;
1218
1219 case '-':
1220 if (space) {
1221 quote = true;
1222 } else {
1223 dashCount++;
1224 }
1225 space = false;
1226 break;
1227
1228 case '*':
1229 case '~':
1230 case '/':
1231 case '\\':
1232 case '<':
1233 case '>':
1234 case '=':
1235 case '+':
1236 case '_':
1237 case '–':
1238 case '—':
1239 space = false;
1240 builder.append(car);
1241 break;
1242
1243 case '‘':
1244 case '`':
1245 case '‹':
1246 case '﹁':
1247 case '〈':
1248 case '「':
1249 if (space || (brk && quote)) {
1250 quote = true;
1251 builder.append(openQuote);
1252 } else {
22848428
NR
1253 // handle double-single quotes as double quotes
1254 if (prev == car) {
1255 builder.deleteCharAt(builder.length() - 1);
1256 builder.append(openDoubleQuote);
1257 } else {
1258 builder.append(openQuote);
1259 }
08fe2e33
NR
1260 }
1261 space = false;
1262 brk = false;
1263 break;
1264
1265 case '’':
1266 case '›':
1267 case '﹂':
1268 case '〉':
1269 case '」':
1270 space = false;
1271 brk = false;
22848428
NR
1272 // handle double-single quotes as double quotes
1273 if (prev == car) {
1274 builder.deleteCharAt(builder.length() - 1);
1275 builder.append(closeDoubleQuote);
1276 } else {
1277 builder.append(closeQuote);
1278 }
08fe2e33
NR
1279 break;
1280
1281 case '«':
1282 case '“':
1283 case '﹃':
1284 case '《':
1285 case '『':
1286 if (space || (brk && quote)) {
1287 quote = true;
1288 builder.append(openDoubleQuote);
1289 } else {
1290 builder.append(openDoubleQuote);
1291 }
1292 space = false;
1293 brk = false;
1294 break;
1295
1296 case '»':
1297 case '”':
1298 case '﹄':
1299 case '》':
1300 case '』':
1301 space = false;
1302 brk = false;
1303 builder.append(closeDoubleQuote);
1304 break;
1305
1306 default:
1307 space = false;
1308 brk = false;
1309 builder.append(car);
1310 break;
1311 }
1312
1313 prev = car;
1314 }
1315
1316 if (tentativeCloseQuote) {
1317 tentativeCloseQuote = false;
1318 builder.append(closeQuote);
1319 }
1320
1321 line = builder.toString().trim();
1322
1323 ParagraphType type = ParagraphType.NORMAL;
1324 if (space) {
1325 type = ParagraphType.BLANK;
1326 } else if (brk) {
1327 type = ParagraphType.BREAK;
1328 } else if (quote) {
1329 type = ParagraphType.QUOTE;
1330 }
1331
793f1071 1332 return new Paragraph(type, line, words);
08fe2e33
NR
1333 }
1334
1335 /**
a4143cd7 1336 * Remove the HTML from the input <b>if</b> {@link BasicSupport#isHtml()} is
08fe2e33
NR
1337 * true.
1338 *
1339 * @param input
1340 * the input
1341 *
1342 * @return the no html version if needed
1343 */
1344 private String ifUnhtml(String input) {
1345 if (isHtml() && input != null) {
1346 return StringUtils.unhtml(input);
1347 }
1348
1349 return input;
1350 }
1351
1352 /**
1353 * Return a {@link BasicSupport} implementation supporting the given
1354 * resource if possible.
1355 *
1356 * @param url
1357 * the story resource
1358 *
1359 * @return an implementation that supports it, or NULL
1360 */
1361 public static BasicSupport getSupport(URL url) {
1362 if (url == null) {
1363 return null;
1364 }
1365
1366 // TEXT and INFO_TEXT always support files (not URLs though)
1367 for (SupportType type : SupportType.values()) {
1368 if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1369 BasicSupport support = getSupport(type);
1370 if (support != null && support.supports(url)) {
1371 return support;
1372 }
1373 }
1374 }
1375
373da363
NR
1376 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
1377 SupportType.TEXT }) {
08fe2e33
NR
1378 BasicSupport support = getSupport(type);
1379 if (support != null && support.supports(url)) {
1380 return support;
1381 }
1382 }
1383
1384 return null;
1385 }
1386
1387 /**
1388 * Return a {@link BasicSupport} implementation supporting the given type.
1389 *
1390 * @param type
1391 * the type
1392 *
1393 * @return an implementation that supports it, or NULL
1394 */
1395 public static BasicSupport getSupport(SupportType type) {
1396 switch (type) {
1397 case EPUB:
1398 return new Epub().setType(type);
1399 case INFO_TEXT:
1400 return new InfoText().setType(type);
1401 case FIMFICTION:
1402 return new Fimfiction().setType(type);
1403 case FANFICTION:
1404 return new Fanfiction().setType(type);
1405 case TEXT:
1406 return new Text().setType(type);
1407 case MANGAFOX:
1408 return new MangaFox().setType(type);
1409 case E621:
1410 return new E621().setType(type);
a4143cd7
NR
1411 case YIFFSTAR:
1412 return new YiffStar().setType(type);
f0608ab1
NR
1413 case E_HENTAI:
1414 return new EHentai().setType(type);
08fe2e33
NR
1415 case CBZ:
1416 return new Cbz().setType(type);
373da363
NR
1417 case HTML:
1418 return new Html().setType(type);
08fe2e33
NR
1419 }
1420
1421 return null;
1422 }
68686a37
NR
1423
1424 /**
1425 * Return the first line from the given input which correspond to the given
1426 * selectors.
1427 *
1428 * @param in
1429 * the input
1430 * @param needle
1431 * a string that must be found inside the target line (also
1432 * supports "^" at start to say "only if it starts with" the
1433 * needle)
1434 * @param relativeLine
1435 * the line to return based upon the target line position (-1 =
1436 * the line before, 0 = the target line...)
1437 *
1438 * @return the line
1439 */
1440 static String getLine(InputStream in, String needle, int relativeLine) {
1441 return getLine(in, needle, relativeLine, true);
1442 }
1443
1444 /**
1445 * Return a line from the given input which correspond to the given
1446 * selectors.
1447 *
1448 * @param in
1449 * the input
1450 * @param needle
1451 * a string that must be found inside the target line (also
1452 * supports "^" at start to say "only if it starts with" the
1453 * needle)
1454 * @param relativeLine
1455 * the line to return based upon the target line position (-1 =
1456 * the line before, 0 = the target line...)
1457 * @param first
1458 * takes the first result (as opposed to the last one, which will
1459 * also always spend the input)
1460 *
1461 * @return the line
1462 */
1463 static String getLine(InputStream in, String needle, int relativeLine,
1464 boolean first) {
1465 String rep = null;
1466
1467 try {
1468 in.reset();
1469 } catch (IOException e) {
1470 Instance.syserr(e);
1471 }
1472
1473 List<String> lines = new ArrayList<String>();
1474 @SuppressWarnings("resource")
1475 Scanner scan = new Scanner(in, "UTF-8");
1476 int index = -1;
1477 scan.useDelimiter("\\n");
1478 while (scan.hasNext()) {
1479 lines.add(scan.next());
1480
1481 if (index == -1) {
1482 if (needle.startsWith("^")) {
1483 if (lines.get(lines.size() - 1).startsWith(
1484 needle.substring(1))) {
1485 index = lines.size() - 1;
1486 }
1487
1488 } else {
1489 if (lines.get(lines.size() - 1).contains(needle)) {
1490 index = lines.size() - 1;
1491 }
1492 }
1493 }
1494
1495 if (index >= 0 && index + relativeLine < lines.size()) {
1496 rep = lines.get(index + relativeLine);
1497 if (first) {
1498 break;
1499 }
1500 }
1501 }
1502
1503 return rep;
1504 }
f0608ab1
NR
1505
1506 /**
1507 * Return the text between the key and the endKey (and optional subKey can
1508 * be passed, in this case we will look for the key first, then take the
1509 * text between the subKey and the endKey).
1510 * <p>
1511 * Will only match the first line with the given key if more than one are
1512 * possible. Which also means that if the subKey or endKey is not found on
1513 * that line, NULL will be returned.
1514 *
1515 * @param in
1516 * the input
1517 * @param key
27dc7179
NR
1518 * the key to match (also supports "^" at start to say
1519 * "only if it starts with" the key)
f0608ab1
NR
1520 * @param subKey
1521 * the sub key or NULL if none
1522 * @param endKey
1523 * the end key or NULL for "up to the end"
1524 * @return the text or NULL if not found
1525 */
1526 static String getKeyLine(InputStream in, String key, String subKey,
1527 String endKey) {
1528 String result = null;
1529
1530 String line = getLine(in, key, 0);
1531 if (line != null && line.contains(key)) {
1532 line = line.substring(line.indexOf(key) + key.length());
1533 if (subKey == null || subKey.isEmpty() || line.contains(subKey)) {
1534 if (subKey != null) {
1535 line = line.substring(line.indexOf(subKey)
1536 + subKey.length());
1537 }
1538 if (endKey == null || line.contains(endKey)) {
1539 if (endKey != null) {
1540 line = line.substring(0, line.indexOf(endKey));
1541 result = line;
1542 }
1543 }
1544 }
1545 }
1546
1547 return result;
1548 }
08fe2e33 1549}