Add more warnings source to 1.6) and fix warnings
[fanfix.git] / src / be / nikiroo / fanfix / supported / BasicSupport.java
CommitLineData
08fe2e33
NR
1package be.nikiroo.fanfix.supported;
2
68686a37 3import java.awt.image.BufferedImage;
68e370a4 4import java.io.BufferedReader;
08fe2e33
NR
5import java.io.ByteArrayInputStream;
6import java.io.File;
7import java.io.IOException;
8import java.io.InputStream;
68e370a4 9import java.io.InputStreamReader;
08fe2e33
NR
10import java.net.MalformedURLException;
11import java.net.URL;
08fe2e33 12import java.util.ArrayList;
793f1071 13import java.util.Date;
08fe2e33
NR
14import java.util.HashMap;
15import java.util.List;
16import java.util.Map;
17import java.util.Map.Entry;
18import java.util.Scanner;
19
20import be.nikiroo.fanfix.Instance;
21import be.nikiroo.fanfix.bundles.Config;
22import be.nikiroo.fanfix.bundles.StringId;
23import be.nikiroo.fanfix.data.Chapter;
24import be.nikiroo.fanfix.data.MetaData;
25import be.nikiroo.fanfix.data.Paragraph;
08fe2e33 26import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
9252c65e 27import be.nikiroo.fanfix.data.Story;
326093dc 28import be.nikiroo.utils.ImageUtils;
3b2b638f 29import be.nikiroo.utils.Progress;
08fe2e33
NR
30import be.nikiroo.utils.StringUtils;
31
32/**
33 * This class is the base class used by the other support classes. It can be
34 * used outside of this package, and have static method that you can use to get
35 * access to the correct support class.
36 * <p>
37 * It will be used with 'resources' (usually web pages or files).
38 *
39 * @author niki
40 */
41public abstract class BasicSupport {
42 /**
43 * The supported input types for which we can get a {@link BasicSupport}
44 * object.
45 *
46 * @author niki
47 */
48 public enum SupportType {
49 /** EPUB files created with this program */
50 EPUB,
51 /** Pure text file with some rules */
52 TEXT,
53 /** TEXT but with associated .info file */
54 INFO_TEXT,
55 /** My Little Pony fanfictions */
56 FIMFICTION,
57 /** Fanfictions from a lot of different universes */
58 FANFICTION,
59 /** Website with lots of Mangas */
60 MANGAFOX,
61 /** Furry website with comics support */
62 E621,
a4143cd7
NR
63 /** Furry website with stories */
64 YIFFSTAR,
f0608ab1
NR
65 /** Comics and images groups, mostly but not only NSFW */
66 E_HENTAI,
08fe2e33 67 /** CBZ files */
373da363
NR
68 CBZ,
69 /** HTML files */
70 HTML;
08fe2e33
NR
71
72 /**
73 * A description of this support type (more information than the
74 * {@link BasicSupport#getSourceName()}).
75 *
76 * @return the description
77 */
78 public String getDesc() {
79 String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
80 this.name());
81
82 if (desc == null) {
83 desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
84 }
85
86 return desc;
87 }
88
89 /**
90 * The name of this support type (a short version).
91 *
92 * @return the name
93 */
94 public String getSourceName() {
95 BasicSupport support = BasicSupport.getSupport(this);
96 if (support != null) {
97 return support.getSourceName();
98 }
99
100 return null;
101 }
102
103 @Override
104 public String toString() {
105 return super.toString().toLowerCase();
106 }
107
108 /**
0efd25e3
NR
109 * Call {@link SupportType#valueOf(String)} after conversion to upper
110 * case.
08fe2e33
NR
111 *
112 * @param typeName
113 * the possible type name
114 *
115 * @return NULL or the type
116 */
117 public static SupportType valueOfUC(String typeName) {
118 return SupportType.valueOf(typeName == null ? null : typeName
119 .toUpperCase());
120 }
121
122 /**
0efd25e3
NR
123 * Call {@link SupportType#valueOf(String)} after conversion to upper
124 * case but return NULL for NULL instead of raising exception.
08fe2e33
NR
125 *
126 * @param typeName
127 * the possible type name
128 *
129 * @return NULL or the type
130 */
131 public static SupportType valueOfNullOkUC(String typeName) {
132 if (typeName == null) {
133 return null;
134 }
135
136 return SupportType.valueOfUC(typeName);
137 }
138
139 /**
0efd25e3
NR
140 * Call {@link SupportType#valueOf(String)} after conversion to upper
141 * case but return NULL in case of error instead of raising an
142 * exception.
08fe2e33
NR
143 *
144 * @param typeName
145 * the possible type name
146 *
147 * @return NULL or the type
148 */
149 public static SupportType valueOfAllOkUC(String typeName) {
150 try {
151 return SupportType.valueOfUC(typeName);
152 } catch (Exception e) {
153 return null;
154 }
155 }
156 }
157
08fe2e33
NR
158 private InputStream in;
159 private SupportType type;
22848428 160 private URL currentReferer; // with only one 'r', as in 'HTTP'...
08fe2e33
NR
161
162 // quote chars
e8eeea0a 163 private char openQuote = Instance.getTrans().getCharacter(
08fe2e33 164 StringId.OPEN_SINGLE_QUOTE);
e8eeea0a 165 private char closeQuote = Instance.getTrans().getCharacter(
08fe2e33 166 StringId.CLOSE_SINGLE_QUOTE);
e8eeea0a 167 private char openDoubleQuote = Instance.getTrans().getCharacter(
08fe2e33 168 StringId.OPEN_DOUBLE_QUOTE);
e8eeea0a 169 private char closeDoubleQuote = Instance.getTrans().getCharacter(
08fe2e33
NR
170 StringId.CLOSE_DOUBLE_QUOTE);
171
172 /**
173 * The name of this support class.
174 *
175 * @return the name
176 */
177 protected abstract String getSourceName();
178
179 /**
180 * Check if the given resource is supported by this {@link BasicSupport}.
181 *
182 * @param url
183 * the resource to check for
184 *
185 * @return TRUE if it is
186 */
187 protected abstract boolean supports(URL url);
188
189 /**
190 * Return TRUE if the support will return HTML encoded content values for
191 * the chapters content.
192 *
193 * @return TRUE for HTML
194 */
195 protected abstract boolean isHtml();
196
0efd25e3
NR
197 /**
198 * Return the {@link MetaData} of this story.
199 *
200 * @param source
201 * the source of the story
202 * @param in
203 * the input (the main resource)
204 *
205 * @return the associated {@link MetaData}
206 *
207 * @throws IOException
208 * in case of I/O error
209 */
68686a37 210 protected abstract MetaData getMeta(URL source, InputStream in)
08fe2e33
NR
211 throws IOException;
212
213 /**
214 * Return the story description.
215 *
216 * @param source
217 * the source of the story
218 * @param in
219 * the input (the main resource)
220 *
221 * @return the description
222 *
223 * @throws IOException
224 * in case of I/O error
225 */
226 protected abstract String getDesc(URL source, InputStream in)
227 throws IOException;
228
08fe2e33
NR
229 /**
230 * Return the list of chapters (name and resource).
231 *
232 * @param source
233 * the source of the story
234 * @param in
235 * the input (the main resource)
ed08c171
NR
236 * @param pg
237 * the optional progress reporter
08fe2e33
NR
238 *
239 * @return the chapters
240 *
241 * @throws IOException
242 * in case of I/O error
243 */
244 protected abstract List<Entry<String, URL>> getChapters(URL source,
ed08c171 245 InputStream in, Progress pg) throws IOException;
08fe2e33
NR
246
247 /**
248 * Return the content of the chapter (possibly HTML encoded, if
249 * {@link BasicSupport#isHtml()} is TRUE).
250 *
251 * @param source
252 * the source of the story
253 * @param in
254 * the input (the main resource)
255 * @param number
256 * the chapter number
ed08c171
NR
257 * @param pg
258 * the optional progress reporter
08fe2e33
NR
259 *
260 * @return the content
261 *
262 * @throws IOException
263 * in case of I/O error
264 */
265 protected abstract String getChapterContent(URL source, InputStream in,
ed08c171 266 int number, Progress pg) throws IOException;
08fe2e33 267
6e06d2cc
NR
268 /**
269 * Log into the support (can be a no-op depending upon the support).
270 *
271 * @throws IOException
272 * in case of I/O error
273 */
274 public void login() throws IOException {
275
276 }
277
08fe2e33
NR
278 /**
279 * Return the list of cookies (values included) that must be used to
280 * correctly fetch the resources.
281 * <p>
282 * You are expected to call the super method implementation if you override
283 * it.
284 *
285 * @return the cookies
6e06d2cc
NR
286 *
287 * @throws IOException
288 * in case of I/O error
08fe2e33 289 */
6e06d2cc 290 public Map<String, String> getCookies() throws IOException {
08fe2e33
NR
291 return new HashMap<String, String>();
292 }
293
a4143cd7
NR
294 /**
295 * Return the canonical form of the main {@link URL}.
296 *
297 * @param source
298 * the source {@link URL}
299 *
300 * @return the canonical form of this {@link URL}
301 *
302 * @throws IOException
303 * in case of I/O error
304 */
305 public URL getCanonicalUrl(URL source) throws IOException {
306 return source;
307 }
308
08fe2e33
NR
309 /**
310 * Process the given story resource into a partially filled {@link Story}
311 * object containing the name and metadata, except for the description.
312 *
313 * @param url
314 * the story resource
315 *
316 * @return the {@link Story}
317 *
318 * @throws IOException
319 * in case of I/O error
320 */
321 public Story processMeta(URL url) throws IOException {
ed08c171 322 return processMeta(url, true, false, null);
08fe2e33
NR
323 }
324
325 /**
326 * Process the given story resource into a partially filled {@link Story}
327 * object containing the name and metadata.
328 *
329 * @param url
330 * the story resource
08fe2e33
NR
331 * @param close
332 * close "this" and "in" when done
0efd25e3
NR
333 * @param getDesc
334 * retrieve the description of the story, or not
ed08c171
NR
335 * @param pg
336 * the optional progress reporter
08fe2e33
NR
337 *
338 * @return the {@link Story}
339 *
340 * @throws IOException
341 * in case of I/O error
342 */
ed08c171
NR
343 protected Story processMeta(URL url, boolean close, boolean getDesc,
344 Progress pg) throws IOException {
345 if (pg == null) {
346 pg = new Progress();
347 } else {
348 pg.setMinMax(0, 100);
349 }
350
6e06d2cc 351 login();
ed08c171 352 pg.setProgress(10);
6e06d2cc 353
a4143cd7
NR
354 url = getCanonicalUrl(url);
355
356 setCurrentReferer(url);
357
373da363 358 in = openInput(url);
08fe2e33
NR
359 if (in == null) {
360 return null;
361 }
362
363 try {
68686a37 364 preprocess(url, getInput());
ed08c171 365 pg.setProgress(30);
08fe2e33
NR
366
367 Story story = new Story();
68686a37 368 MetaData meta = getMeta(url, getInput());
793f1071
NR
369 if (meta.getCreationDate() == null
370 || meta.getCreationDate().isEmpty()) {
371 meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
372 }
68686a37
NR
373 story.setMeta(meta);
374
ed08c171
NR
375 pg.setProgress(50);
376
211f7ddb 377 if (meta.getCover() == null) {
68686a37
NR
378 meta.setCover(getDefaultCover(meta.getSubject()));
379 }
08fe2e33 380
ed08c171
NR
381 pg.setProgress(60);
382
08fe2e33
NR
383 if (getDesc) {
384 String descChapterName = Instance.getTrans().getString(
385 StringId.DESCRIPTION);
386 story.getMeta().setResume(
387 makeChapter(url, 0, descChapterName,
ed08c171 388 getDesc(url, getInput()), null));
08fe2e33
NR
389 }
390
ed08c171 391 pg.setProgress(100);
08fe2e33
NR
392 return story;
393 } finally {
394 if (close) {
395 try {
396 close();
397 } catch (IOException e) {
398 Instance.syserr(e);
399 }
400
401 if (in != null) {
402 in.close();
403 }
404 }
a4143cd7
NR
405
406 setCurrentReferer(null);
08fe2e33
NR
407 }
408 }
409
410 /**
411 * Process the given story resource into a fully filled {@link Story}
412 * object.
413 *
414 * @param url
415 * the story resource
92fb0719
NR
416 * @param pg
417 * the optional progress reporter
08fe2e33
NR
418 *
419 * @return the {@link Story}
420 *
421 * @throws IOException
422 * in case of I/O error
423 */
92fb0719
NR
424 public Story process(URL url, Progress pg) throws IOException {
425 if (pg == null) {
426 pg = new Progress();
427 } else {
428 pg.setMinMax(0, 100);
429 }
430
a4143cd7 431 url = getCanonicalUrl(url);
92fb0719 432 pg.setProgress(1);
08fe2e33 433 try {
ed08c171
NR
434 Progress pgMeta = new Progress();
435 pg.addProgress(pgMeta, 10);
436 Story story = processMeta(url, false, true, pgMeta);
437 if (!pgMeta.isDone()) {
438 pgMeta.setProgress(pgMeta.getMax()); // 10%
439 }
440
08fe2e33 441 if (story == null) {
ed08c171 442 pg.setProgress(90);
08fe2e33
NR
443 return null;
444 }
445
754a5bc2
NR
446 pg.setName("Retrieving " + story.getMeta().getTitle());
447
a4143cd7
NR
448 setCurrentReferer(url);
449
ed08c171
NR
450 Progress pgGetChapters = new Progress();
451 pg.addProgress(pgGetChapters, 10);
08fe2e33 452 story.setChapters(new ArrayList<Chapter>());
ed08c171
NR
453 List<Entry<String, URL>> chapters = getChapters(url, getInput(),
454 pgGetChapters);
455 if (!pgGetChapters.isDone()) {
456 pgGetChapters.setProgress(pgGetChapters.getMax()); // 20%
457 }
08fe2e33 458
08fe2e33 459 if (chapters != null) {
ed08c171
NR
460 Progress pgChaps = new Progress("Extracting chapters", 0,
461 chapters.size() * 300);
92fb0719
NR
462 pg.addProgress(pgChaps, 80);
463
793f1071 464 long words = 0;
ed08c171 465 int i = 1;
08fe2e33 466 for (Entry<String, URL> chap : chapters) {
ed08c171 467 pgChaps.setName("Extracting chapter " + i);
08fe2e33
NR
468 setCurrentReferer(chap.getValue());
469 InputStream chapIn = Instance.getCache().open(
470 chap.getValue(), this, true);
ed08c171 471 pgChaps.setProgress(i * 100);
08fe2e33 472 try {
ed08c171
NR
473 Progress pgGetChapterContent = new Progress();
474 Progress pgMakeChapter = new Progress();
475 pgChaps.addProgress(pgGetChapterContent, 100);
476 pgChaps.addProgress(pgMakeChapter, 100);
477
478 String content = getChapterContent(url, chapIn, i,
479 pgGetChapterContent);
480 if (!pgGetChapterContent.isDone()) {
481 pgGetChapterContent.setProgress(pgGetChapterContent
482 .getMax());
483 }
484
793f1071 485 Chapter cc = makeChapter(url, i, chap.getKey(),
ed08c171
NR
486 content, pgMakeChapter);
487 if (!pgMakeChapter.isDone()) {
488 pgMakeChapter.setProgress(pgMakeChapter.getMax());
489 }
490
793f1071
NR
491 words += cc.getWords();
492 story.getChapters().add(cc);
493 if (story.getMeta() != null) {
494 story.getMeta().setWords(words);
495 }
08fe2e33
NR
496 } finally {
497 chapIn.close();
498 }
a6395bef 499
ed08c171 500 i++;
08fe2e33 501 }
ed08c171
NR
502
503 pgChaps.setName("Extracting chapters");
92fb0719 504 } else {
ed08c171 505 pg.setProgress(80);
08fe2e33
NR
506 }
507
508 return story;
509
510 } finally {
511 try {
512 close();
513 } catch (IOException e) {
514 Instance.syserr(e);
515 }
516
517 if (in != null) {
518 in.close();
519 }
520
a4143cd7 521 setCurrentReferer(null);
08fe2e33
NR
522 }
523 }
524
525 /**
a4143cd7 526 * The support type.
08fe2e33
NR
527 *
528 * @return the type
529 */
530 public SupportType getType() {
531 return type;
532 }
533
534 /**
535 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
536 * the current {@link URL} we work on.
537 *
538 * @return the referer
539 */
540 public URL getCurrentReferer() {
541 return currentReferer;
542 }
543
544 /**
545 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
546 * the current {@link URL} we work on.
547 *
548 * @param currentReferer
549 * the new referer
550 */
551 protected void setCurrentReferer(URL currentReferer) {
552 this.currentReferer = currentReferer;
553 }
554
555 /**
556 * The support type.
557 *
558 * @param type
559 * the new type
560 *
561 * @return this
562 */
563 protected BasicSupport setType(SupportType type) {
564 this.type = type;
565 return this;
566 }
567
568 /**
68686a37 569 * Prepare the support if needed before processing.
08fe2e33
NR
570 *
571 * @param source
572 * the source of the story
573 * @param in
574 * the input (the main resource)
575 *
08fe2e33
NR
576 * @throws IOException
577 * on I/O error
578 */
211f7ddb 579 @SuppressWarnings("unused")
68686a37 580 protected void preprocess(URL source, InputStream in) throws IOException {
08fe2e33
NR
581 }
582
583 /**
584 * Now that we have processed the {@link Story}, close the resources if any.
585 *
586 * @throws IOException
587 * on I/O error
588 */
589 protected void close() throws IOException {
590 }
591
592 /**
593 * Create a {@link Chapter} object from the given information, formatting
594 * the content as it should be.
595 *
0efd25e3
NR
596 * @param source
597 * the source of the story
08fe2e33
NR
598 * @param number
599 * the chapter number
600 * @param name
601 * the chapter name
602 * @param content
603 * the chapter content
ed08c171
NR
604 * @param pg
605 * the optional progress reporter
08fe2e33
NR
606 *
607 * @return the {@link Chapter}
608 *
609 * @throws IOException
610 * in case of I/O error
611 */
612 protected Chapter makeChapter(URL source, int number, String name,
ed08c171 613 String content, Progress pg) throws IOException {
08fe2e33 614 // Chapter name: process it correctly, then remove the possible
f60df2f1
NR
615 // redundant "Chapter x: " in front of it, or "-" (as in
616 // "Chapter 5: - Fun!" after the ": " was automatically added)
08fe2e33
NR
617 String chapterName = processPara(name).getContent().trim();
618 for (String lang : Instance.getConfig().getString(Config.CHAPTER)
619 .split(",")) {
620 String chapterWord = Instance.getConfig().getStringX(
621 Config.CHAPTER, lang);
622 if (chapterName.startsWith(chapterWord)) {
623 chapterName = chapterName.substring(chapterWord.length())
624 .trim();
625 break;
626 }
627 }
628
629 if (chapterName.startsWith(Integer.toString(number))) {
630 chapterName = chapterName.substring(
631 Integer.toString(number).length()).trim();
632 }
633
f60df2f1 634 while (chapterName.startsWith(":") || chapterName.startsWith("-")) {
08fe2e33
NR
635 chapterName = chapterName.substring(1).trim();
636 }
637 //
638
639 Chapter chap = new Chapter(number, chapterName);
640
68e370a4 641 if (content != null) {
ed08c171 642 List<Paragraph> paras = makeParagraphs(source, content, pg);
793f1071
NR
643 long words = 0;
644 for (Paragraph para : paras) {
645 words += para.getWords();
646 }
647 chap.setParagraphs(paras);
648 chap.setWords(words);
08fe2e33
NR
649 }
650
68e370a4
NR
651 return chap;
652
653 }
654
655 /**
656 * Convert the given content into {@link Paragraph}s.
657 *
658 * @param source
659 * the source URL of the story
660 * @param content
661 * the textual content
ed08c171
NR
662 * @param pg
663 * the optional progress reporter
68e370a4
NR
664 *
665 * @return the {@link Paragraph}s
666 *
667 * @throws IOException
668 * in case of I/O error
669 */
ed08c171
NR
670 protected List<Paragraph> makeParagraphs(URL source, String content,
671 Progress pg) throws IOException {
672 if (pg == null) {
673 pg = new Progress();
674 }
675
08fe2e33
NR
676 if (isHtml()) {
677 // Special <HR> processing:
678 content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
d5a7153c 679 "<br/>* * *<br/>");
08fe2e33
NR
680 }
681
68e370a4 682 List<Paragraph> paras = new ArrayList<Paragraph>();
08fe2e33 683
d5a7153c
NR
684 if (content != null && !content.trim().isEmpty()) {
685 if (isHtml()) {
ed08c171
NR
686 String[] tab = content.split("(<p>|</p>|<br>|<br/>)");
687 pg.setMinMax(0, tab.length);
688 int i = 1;
689 for (String line : tab) {
690 if (line.startsWith("[") && line.endsWith("]")) {
691 pg.setName("Extracting image " + i);
692 }
d5a7153c 693 paras.add(makeParagraph(source, line.trim()));
ed08c171 694 pg.setProgress(i++);
d5a7153c 695 }
ed08c171 696 pg.setName(null);
d5a7153c 697 } else {
ed08c171 698 List<String> lines = new ArrayList<String>();
d5a7153c
NR
699 BufferedReader buff = null;
700 try {
701 buff = new BufferedReader(
702 new InputStreamReader(new ByteArrayInputStream(
703 content.getBytes("UTF-8")), "UTF-8"));
704 for (String line = buff.readLine(); line != null; line = buff
705 .readLine()) {
ed08c171 706 lines.add(line.trim());
68e370a4 707 }
d5a7153c
NR
708 } finally {
709 if (buff != null) {
710 buff.close();
68e370a4 711 }
08fe2e33 712 }
ed08c171
NR
713
714 pg.setMinMax(0, lines.size());
715 int i = 0;
716 for (String line : lines) {
717 if (line.startsWith("[") && line.endsWith("]")) {
718 pg.setName("Extracting image " + i);
719 }
720 paras.add(makeParagraph(source, line));
721 pg.setProgress(i++);
722 }
723 pg.setName(null);
08fe2e33
NR
724 }
725
d5a7153c
NR
726 // Check quotes for "bad" format
727 List<Paragraph> newParas = new ArrayList<Paragraph>();
728 for (Paragraph para : paras) {
729 newParas.addAll(requotify(para));
730 }
731 paras = newParas;
08fe2e33 732
d5a7153c
NR
733 // Remove double blanks/brks
734 fixBlanksBreaks(paras);
735 }
08fe2e33 736
68e370a4
NR
737 return paras;
738 }
08fe2e33 739
d5a7153c
NR
740 /**
741 * Convert the given line into a single {@link Paragraph}.
742 *
743 * @param source
744 * the source URL of the story
745 * @param line
746 * the textual content of the paragraph
747 *
748 * @return the {@link Paragraph}
749 */
750 private Paragraph makeParagraph(URL source, String line) {
751 URL image = null;
752 if (line.startsWith("[") && line.endsWith("]")) {
753 image = getImageUrl(this, source,
754 line.substring(1, line.length() - 1).trim());
755 }
756
757 if (image != null) {
758 return new Paragraph(image);
d5a7153c 759 }
211f7ddb
NR
760
761 return processPara(line);
d5a7153c
NR
762 }
763
68e370a4
NR
764 /**
765 * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
766 * those {@link Paragraph}s.
767 * <p>
768 * The resulting list will not contain a starting or trailing blank/break
769 * nor 2 blanks or breaks following each other.
770 *
771 * @param paras
772 * the list of {@link Paragraph}s to fix
773 */
774 protected void fixBlanksBreaks(List<Paragraph> paras) {
775 boolean space = false;
776 boolean brk = true;
777 for (int i = 0; i < paras.size(); i++) {
778 Paragraph para = paras.get(i);
779 boolean thisSpace = para.getType() == ParagraphType.BLANK;
780 boolean thisBrk = para.getType() == ParagraphType.BREAK;
781
782 if (i > 0 && space && thisBrk) {
783 paras.remove(i - 1);
784 i--;
785 } else if ((space || brk) && (thisSpace || thisBrk)) {
786 paras.remove(i);
787 i--;
08fe2e33
NR
788 }
789
68e370a4
NR
790 space = thisSpace;
791 brk = thisBrk;
792 }
08fe2e33 793
68e370a4
NR
794 // Remove blank/brk at start
795 if (paras.size() > 0
796 && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
797 0).getType() == ParagraphType.BREAK)) {
798 paras.remove(0);
799 }
800
801 // Remove blank/brk at end
802 int last = paras.size() - 1;
803 if (paras.size() > 0
804 && (paras.get(last).getType() == ParagraphType.BLANK || paras
805 .get(last).getType() == ParagraphType.BREAK)) {
806 paras.remove(last);
08fe2e33
NR
807 }
808 }
809
68e370a4
NR
810 /**
811 * Get the default cover related to this subject (see <tt>.info</tt> files).
812 *
813 * @param subject
814 * the subject
815 *
816 * @return the cover if any, or NULL
817 */
68686a37
NR
818 static BufferedImage getDefaultCover(String subject) {
819 if (subject != null && !subject.isEmpty()
820 && Instance.getCoverDir() != null) {
821 try {
822 File fileCover = new File(Instance.getCoverDir(), subject);
333f0e7b 823 return getImage(null, fileCover.toURI().toURL(), subject);
68686a37
NR
824 } catch (MalformedURLException e) {
825 }
826 }
827
828 return null;
829 }
830
08fe2e33
NR
831 /**
832 * Return the list of supported image extensions.
833 *
a4143cd7
NR
834 * @param emptyAllowed
835 * TRUE to allow an empty extension on first place, which can be
836 * used when you may already have an extension in your input but
837 * are not sure about it
838 *
08fe2e33
NR
839 * @return the extensions
840 */
68686a37 841 static String[] getImageExt(boolean emptyAllowed) {
08fe2e33
NR
842 if (emptyAllowed) {
843 return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
08fe2e33 844 }
211f7ddb
NR
845
846 return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
08fe2e33
NR
847 }
848
a4143cd7
NR
849 /**
850 * Check if the given resource can be a local image or a remote image, then
851 * refresh the cache with it if it is.
852 *
853 * @param source
854 * the story source
855 * @param line
856 * the resource to check
857 *
858 * @return the image if found, or NULL
859 *
860 */
333f0e7b
NR
861 static BufferedImage getImage(BasicSupport support, URL source, String line) {
862 URL url = getImageUrl(support, source, line);
68686a37
NR
863 if (url != null) {
864 InputStream in = null;
865 try {
866 in = Instance.getCache().open(url, getSupport(url), true);
326093dc 867 return ImageUtils.fromStream(in);
68686a37
NR
868 } catch (IOException e) {
869 } finally {
870 if (in != null) {
871 try {
872 in.close();
873 } catch (IOException e) {
874 }
875 }
876 }
877 }
878
879 return null;
880 }
881
08fe2e33
NR
882 /**
883 * Check if the given resource can be a local image or a remote image, then
884 * refresh the cache with it if it is.
885 *
886 * @param source
887 * the story source
888 * @param line
889 * the resource to check
890 *
891 * @return the image URL if found, or NULL
892 *
893 */
333f0e7b 894 static URL getImageUrl(BasicSupport support, URL source, String line) {
08fe2e33
NR
895 URL url = null;
896
68686a37
NR
897 if (line != null) {
898 // try for files
68686a37 899 if (source != null) {
68686a37 900 try {
2ab2e40a
NR
901
902 String relPath = null;
903 String absPath = null;
904 try {
905 String path = new File(source.getFile()).getParent();
906 relPath = new File(new File(path), line.trim())
907 .getAbsolutePath();
908 } catch (Exception e) {
909 // Cannot be converted to path (one possibility to take
910 // into account: absolute path on Windows)
911 }
912 try {
913 absPath = new File(line.trim()).getAbsolutePath();
914 } catch (Exception e) {
915 // Cannot be converted to path (at all)
916 }
917
68686a37 918 for (String ext : getImageExt(true)) {
2ab2e40a
NR
919 if (absPath != null && new File(absPath + ext).exists()) {
920 url = new File(absPath + ext).toURI().toURL();
921 } else if (relPath != null
922 && new File(relPath + ext).exists()) {
923 url = new File(relPath + ext).toURI().toURL();
68686a37 924 }
08fe2e33 925 }
68686a37 926 } catch (Exception e) {
2ab2e40a 927 // Should not happen since we control the correct arguments
08fe2e33 928 }
68686a37 929 }
08fe2e33 930
68686a37
NR
931 if (url == null) {
932 // try for URLs
933 try {
08fe2e33 934 for (String ext : getImageExt(true)) {
68686a37 935 if (Instance.getCache().check(new URL(line + ext))) {
08fe2e33 936 url = new URL(line + ext);
333f0e7b 937 break;
08fe2e33
NR
938 }
939 }
68686a37
NR
940
941 // try out of cache
942 if (url == null) {
943 for (String ext : getImageExt(true)) {
944 try {
945 url = new URL(line + ext);
333f0e7b 946 Instance.getCache().refresh(url, support, true);
68686a37
NR
947 break;
948 } catch (IOException e) {
949 // no image with this ext
950 url = null;
951 }
952 }
953 }
954 } catch (MalformedURLException e) {
955 // Not an url
08fe2e33 956 }
08fe2e33 957 }
08fe2e33 958
68686a37
NR
959 // refresh the cached file
960 if (url != null) {
961 try {
333f0e7b 962 Instance.getCache().refresh(url, support, true);
68686a37
NR
963 } catch (IOException e) {
964 // woops, broken image
965 url = null;
966 }
08fe2e33
NR
967 }
968 }
969
970 return url;
971 }
972
373da363
NR
973 /**
974 * Open the input file that will be used through the support.
975 *
976 * @param source
977 * the source {@link URL}
978 *
979 * @return the {@link InputStream}
980 *
981 * @throws IOException
982 * in case of I/O error
983 */
984 protected InputStream openInput(URL source) throws IOException {
985 return Instance.getCache().open(source, this, false);
986 }
987
a4143cd7
NR
988 /**
989 * Reset the given {@link InputStream} and return it.
990 *
991 * @param in
992 * the {@link InputStream} to reset
993 *
994 * @return the same {@link InputStream} after reset
995 */
68686a37
NR
996 protected InputStream reset(InputStream in) {
997 try {
998 in.reset();
999 } catch (IOException e) {
1000 }
1001 return in;
1002 }
1003
08fe2e33
NR
1004 /**
1005 * Reset then return {@link BasicSupport#in}.
1006 *
1007 * @return {@link BasicSupport#in}
08fe2e33 1008 */
68686a37
NR
1009 protected InputStream getInput() {
1010 return reset(in);
08fe2e33
NR
1011 }
1012
1013 /**
1014 * Fix the author name if it is prefixed with some "by" {@link String}.
1015 *
1016 * @param author
1017 * the author with a possible prefix
1018 *
1019 * @return the author without prefixes
1020 */
68686a37 1021 protected String fixAuthor(String author) {
08fe2e33
NR
1022 if (author != null) {
1023 for (String suffix : new String[] { " ", ":" }) {
1024 for (String byString : Instance.getConfig()
1025 .getString(Config.BYS).split(",")) {
1026 byString += suffix;
1027 if (author.toUpperCase().startsWith(byString.toUpperCase())) {
1028 author = author.substring(byString.length()).trim();
1029 }
1030 }
1031 }
1032
1033 // Special case (without suffix):
1034 if (author.startsWith("©")) {
1035 author = author.substring(1);
1036 }
1037 }
1038
1039 return author;
1040 }
1041
1042 /**
1043 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
1044 * and requotify them (i.e., separate them into QUOTE paragraphs and other
1045 * paragraphs (quotes or not)).
1046 *
1047 * @param para
a4143cd7 1048 * the paragraph to requotify (not necessarily a quote)
08fe2e33
NR
1049 *
1050 * @return the correctly (or so we hope) quotified paragraphs
1051 */
68e370a4 1052 protected List<Paragraph> requotify(Paragraph para) {
08fe2e33
NR
1053 List<Paragraph> newParas = new ArrayList<Paragraph>();
1054
68686a37
NR
1055 if (para.getType() == ParagraphType.QUOTE
1056 && para.getContent().length() > 2) {
08fe2e33
NR
1057 String line = para.getContent();
1058 boolean singleQ = line.startsWith("" + openQuote);
1059 boolean doubleQ = line.startsWith("" + openDoubleQuote);
1060
b4dc6ab5
NR
1061 // Do not try when more than one quote at a time
1062 // (some stories are not easily readable if we do)
1063 if (singleQ
1064 && line.indexOf(closeQuote, 1) < line
1065 .lastIndexOf(closeQuote)) {
1066 newParas.add(para);
1067 return newParas;
1068 }
1069 if (doubleQ
1070 && line.indexOf(closeDoubleQuote, 1) < line
1071 .lastIndexOf(closeDoubleQuote)) {
1072 newParas.add(para);
1073 return newParas;
1074 }
1075 //
1076
08fe2e33
NR
1077 if (!singleQ && !doubleQ) {
1078 line = openDoubleQuote + line + closeDoubleQuote;
793f1071
NR
1079 newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
1080 .getWords()));
08fe2e33 1081 } else {
a6395bef 1082 char open = singleQ ? openQuote : openDoubleQuote;
08fe2e33 1083 char close = singleQ ? closeQuote : closeDoubleQuote;
a6395bef
NR
1084
1085 int posDot = -1;
1086 boolean inQuote = false;
1087 int i = 0;
1088 for (char car : line.toCharArray()) {
1089 if (car == open) {
1090 inQuote = true;
1091 } else if (car == close) {
1092 inQuote = false;
1093 } else if (car == '.' && !inQuote) {
1094 posDot = i;
1095 break;
1096 }
1097 i++;
08fe2e33
NR
1098 }
1099
1100 if (posDot >= 0) {
1101 String rest = line.substring(posDot + 1).trim();
1102 line = line.substring(0, posDot + 1).trim();
793f1071
NR
1103 long words = 1;
1104 for (char car : line.toCharArray()) {
1105 if (car == ' ') {
1106 words++;
1107 }
1108 }
1109 newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
68686a37
NR
1110 if (!rest.isEmpty()) {
1111 newParas.addAll(requotify(processPara(rest)));
1112 }
08fe2e33
NR
1113 } else {
1114 newParas.add(para);
1115 }
1116 }
1117 } else {
1118 newParas.add(para);
1119 }
1120
1121 return newParas;
1122 }
1123
1124 /**
1125 * Process a {@link Paragraph} from a raw line of text.
1126 * <p>
1127 * Will also fix quotes and HTML encoding if needed.
1128 *
1129 * @param line
1130 * the raw line
1131 *
1132 * @return the processed {@link Paragraph}
1133 */
22848428 1134 protected Paragraph processPara(String line) {
08fe2e33
NR
1135 line = ifUnhtml(line).trim();
1136
1137 boolean space = true;
1138 boolean brk = true;
1139 boolean quote = false;
1140 boolean tentativeCloseQuote = false;
1141 char prev = '\0';
1142 int dashCount = 0;
793f1071 1143 long words = 1;
08fe2e33
NR
1144
1145 StringBuilder builder = new StringBuilder();
1146 for (char car : line.toCharArray()) {
1147 if (car != '-') {
1148 if (dashCount > 0) {
1149 // dash, ndash and mdash: - – —
1150 // currently: always use mdash
1151 builder.append(dashCount == 1 ? '-' : '—');
1152 }
1153 dashCount = 0;
1154 }
1155
1156 if (tentativeCloseQuote) {
1157 tentativeCloseQuote = false;
22848428 1158 if (Character.isLetterOrDigit(car)) {
08fe2e33
NR
1159 builder.append("'");
1160 } else {
22848428
NR
1161 // handle double-single quotes as double quotes
1162 if (prev == car) {
1163 builder.append(closeDoubleQuote);
1164 continue;
22848428 1165 }
211f7ddb
NR
1166
1167 builder.append(closeQuote);
08fe2e33
NR
1168 }
1169 }
1170
1171 switch (car) {
1172 case ' ': // note: unbreakable space
1173 case ' ':
1174 case '\t':
1175 case '\n': // just in case
1176 case '\r': // just in case
793f1071
NR
1177 if (builder.length() > 0
1178 && builder.charAt(builder.length() - 1) != ' ') {
1179 words++;
1180 }
08fe2e33
NR
1181 builder.append(' ');
1182 break;
1183
1184 case '\'':
1185 if (space || (brk && quote)) {
1186 quote = true;
22848428
NR
1187 // handle double-single quotes as double quotes
1188 if (prev == car) {
1189 builder.deleteCharAt(builder.length() - 1);
1190 builder.append(openDoubleQuote);
1191 } else {
1192 builder.append(openQuote);
1193 }
1194 } else if (prev == ' ' || prev == car) {
1195 // handle double-single quotes as double quotes
1196 if (prev == car) {
1197 builder.deleteCharAt(builder.length() - 1);
1198 builder.append(openDoubleQuote);
1199 } else {
1200 builder.append(openQuote);
1201 }
08fe2e33
NR
1202 } else {
1203 // it is a quote ("I'm off") or a 'quote' ("This
1204 // 'good' restaurant"...)
1205 tentativeCloseQuote = true;
1206 }
1207 break;
1208
1209 case '"':
1210 if (space || (brk && quote)) {
1211 quote = true;
1212 builder.append(openDoubleQuote);
1213 } else if (prev == ' ') {
1214 builder.append(openDoubleQuote);
1215 } else {
1216 builder.append(closeDoubleQuote);
1217 }
1218 break;
1219
1220 case '-':
1221 if (space) {
1222 quote = true;
1223 } else {
1224 dashCount++;
1225 }
1226 space = false;
1227 break;
1228
1229 case '*':
1230 case '~':
1231 case '/':
1232 case '\\':
1233 case '<':
1234 case '>':
1235 case '=':
1236 case '+':
1237 case '_':
1238 case '–':
1239 case '—':
1240 space = false;
1241 builder.append(car);
1242 break;
1243
1244 case '‘':
1245 case '`':
1246 case '‹':
1247 case '﹁':
1248 case '〈':
1249 case '「':
1250 if (space || (brk && quote)) {
1251 quote = true;
1252 builder.append(openQuote);
1253 } else {
22848428
NR
1254 // handle double-single quotes as double quotes
1255 if (prev == car) {
1256 builder.deleteCharAt(builder.length() - 1);
1257 builder.append(openDoubleQuote);
1258 } else {
1259 builder.append(openQuote);
1260 }
08fe2e33
NR
1261 }
1262 space = false;
1263 brk = false;
1264 break;
1265
1266 case '’':
1267 case '›':
1268 case '﹂':
1269 case '〉':
1270 case '」':
1271 space = false;
1272 brk = false;
22848428
NR
1273 // handle double-single quotes as double quotes
1274 if (prev == car) {
1275 builder.deleteCharAt(builder.length() - 1);
1276 builder.append(closeDoubleQuote);
1277 } else {
1278 builder.append(closeQuote);
1279 }
08fe2e33
NR
1280 break;
1281
1282 case '«':
1283 case '“':
1284 case '﹃':
1285 case '《':
1286 case '『':
1287 if (space || (brk && quote)) {
1288 quote = true;
1289 builder.append(openDoubleQuote);
1290 } else {
1291 builder.append(openDoubleQuote);
1292 }
1293 space = false;
1294 brk = false;
1295 break;
1296
1297 case '»':
1298 case '”':
1299 case '﹄':
1300 case '》':
1301 case '』':
1302 space = false;
1303 brk = false;
1304 builder.append(closeDoubleQuote);
1305 break;
1306
1307 default:
1308 space = false;
1309 brk = false;
1310 builder.append(car);
1311 break;
1312 }
1313
1314 prev = car;
1315 }
1316
1317 if (tentativeCloseQuote) {
1318 tentativeCloseQuote = false;
1319 builder.append(closeQuote);
1320 }
1321
1322 line = builder.toString().trim();
1323
1324 ParagraphType type = ParagraphType.NORMAL;
1325 if (space) {
1326 type = ParagraphType.BLANK;
1327 } else if (brk) {
1328 type = ParagraphType.BREAK;
1329 } else if (quote) {
1330 type = ParagraphType.QUOTE;
1331 }
1332
793f1071 1333 return new Paragraph(type, line, words);
08fe2e33
NR
1334 }
1335
1336 /**
a4143cd7 1337 * Remove the HTML from the input <b>if</b> {@link BasicSupport#isHtml()} is
08fe2e33
NR
1338 * true.
1339 *
1340 * @param input
1341 * the input
1342 *
1343 * @return the no html version if needed
1344 */
1345 private String ifUnhtml(String input) {
1346 if (isHtml() && input != null) {
1347 return StringUtils.unhtml(input);
1348 }
1349
1350 return input;
1351 }
1352
1353 /**
1354 * Return a {@link BasicSupport} implementation supporting the given
1355 * resource if possible.
1356 *
1357 * @param url
1358 * the story resource
1359 *
1360 * @return an implementation that supports it, or NULL
1361 */
1362 public static BasicSupport getSupport(URL url) {
1363 if (url == null) {
1364 return null;
1365 }
1366
1367 // TEXT and INFO_TEXT always support files (not URLs though)
1368 for (SupportType type : SupportType.values()) {
1369 if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
1370 BasicSupport support = getSupport(type);
1371 if (support != null && support.supports(url)) {
1372 return support;
1373 }
1374 }
1375 }
1376
373da363
NR
1377 for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
1378 SupportType.TEXT }) {
08fe2e33
NR
1379 BasicSupport support = getSupport(type);
1380 if (support != null && support.supports(url)) {
1381 return support;
1382 }
1383 }
1384
1385 return null;
1386 }
1387
1388 /**
1389 * Return a {@link BasicSupport} implementation supporting the given type.
1390 *
1391 * @param type
1392 * the type
1393 *
1394 * @return an implementation that supports it, or NULL
1395 */
1396 public static BasicSupport getSupport(SupportType type) {
1397 switch (type) {
1398 case EPUB:
1399 return new Epub().setType(type);
1400 case INFO_TEXT:
1401 return new InfoText().setType(type);
1402 case FIMFICTION:
1403 return new Fimfiction().setType(type);
1404 case FANFICTION:
1405 return new Fanfiction().setType(type);
1406 case TEXT:
1407 return new Text().setType(type);
1408 case MANGAFOX:
1409 return new MangaFox().setType(type);
1410 case E621:
1411 return new E621().setType(type);
a4143cd7
NR
1412 case YIFFSTAR:
1413 return new YiffStar().setType(type);
f0608ab1
NR
1414 case E_HENTAI:
1415 return new EHentai().setType(type);
08fe2e33
NR
1416 case CBZ:
1417 return new Cbz().setType(type);
373da363
NR
1418 case HTML:
1419 return new Html().setType(type);
08fe2e33
NR
1420 }
1421
1422 return null;
1423 }
68686a37
NR
1424
1425 /**
1426 * Return the first line from the given input which correspond to the given
1427 * selectors.
1428 *
1429 * @param in
1430 * the input
1431 * @param needle
1432 * a string that must be found inside the target line (also
1433 * supports "^" at start to say "only if it starts with" the
1434 * needle)
1435 * @param relativeLine
1436 * the line to return based upon the target line position (-1 =
1437 * the line before, 0 = the target line...)
1438 *
1439 * @return the line
1440 */
1441 static String getLine(InputStream in, String needle, int relativeLine) {
1442 return getLine(in, needle, relativeLine, true);
1443 }
1444
1445 /**
1446 * Return a line from the given input which correspond to the given
1447 * selectors.
1448 *
1449 * @param in
1450 * the input
1451 * @param needle
1452 * a string that must be found inside the target line (also
1453 * supports "^" at start to say "only if it starts with" the
1454 * needle)
1455 * @param relativeLine
1456 * the line to return based upon the target line position (-1 =
1457 * the line before, 0 = the target line...)
1458 * @param first
1459 * takes the first result (as opposed to the last one, which will
1460 * also always spend the input)
1461 *
1462 * @return the line
1463 */
1464 static String getLine(InputStream in, String needle, int relativeLine,
1465 boolean first) {
1466 String rep = null;
1467
1468 try {
1469 in.reset();
1470 } catch (IOException e) {
1471 Instance.syserr(e);
1472 }
1473
1474 List<String> lines = new ArrayList<String>();
1475 @SuppressWarnings("resource")
1476 Scanner scan = new Scanner(in, "UTF-8");
1477 int index = -1;
1478 scan.useDelimiter("\\n");
1479 while (scan.hasNext()) {
1480 lines.add(scan.next());
1481
1482 if (index == -1) {
1483 if (needle.startsWith("^")) {
1484 if (lines.get(lines.size() - 1).startsWith(
1485 needle.substring(1))) {
1486 index = lines.size() - 1;
1487 }
1488
1489 } else {
1490 if (lines.get(lines.size() - 1).contains(needle)) {
1491 index = lines.size() - 1;
1492 }
1493 }
1494 }
1495
1496 if (index >= 0 && index + relativeLine < lines.size()) {
1497 rep = lines.get(index + relativeLine);
1498 if (first) {
1499 break;
1500 }
1501 }
1502 }
1503
1504 return rep;
1505 }
f0608ab1
NR
1506
1507 /**
1508 * Return the text between the key and the endKey (and optional subKey can
1509 * be passed, in this case we will look for the key first, then take the
1510 * text between the subKey and the endKey).
1511 * <p>
1512 * Will only match the first line with the given key if more than one are
1513 * possible. Which also means that if the subKey or endKey is not found on
1514 * that line, NULL will be returned.
1515 *
1516 * @param in
1517 * the input
1518 * @param key
27dc7179
NR
1519 * the key to match (also supports "^" at start to say
1520 * "only if it starts with" the key)
f0608ab1
NR
1521 * @param subKey
1522 * the sub key or NULL if none
1523 * @param endKey
1524 * the end key or NULL for "up to the end"
1525 * @return the text or NULL if not found
1526 */
1527 static String getKeyLine(InputStream in, String key, String subKey,
1528 String endKey) {
1529 String result = null;
1530
1531 String line = getLine(in, key, 0);
1532 if (line != null && line.contains(key)) {
1533 line = line.substring(line.indexOf(key) + key.length());
1534 if (subKey == null || subKey.isEmpty() || line.contains(subKey)) {
1535 if (subKey != null) {
1536 line = line.substring(line.indexOf(subKey)
1537 + subKey.length());
1538 }
1539 if (endKey == null || line.contains(endKey)) {
1540 if (endKey != null) {
1541 line = line.substring(0, line.indexOf(endKey));
1542 result = line;
1543 }
1544 }
1545 }
1546 }
1547
1548 return result;
1549 }
08fe2e33 1550}