1 package be
.nikiroo
.fanfix
.supported
;
3 import java
.awt
.image
.BufferedImage
;
4 import java
.io
.ByteArrayInputStream
;
6 import java
.io
.IOException
;
7 import java
.io
.InputStream
;
8 import java
.net
.MalformedURLException
;
10 import java
.util
.ArrayList
;
11 import java
.util
.HashMap
;
12 import java
.util
.List
;
14 import java
.util
.Map
.Entry
;
15 import java
.util
.Scanner
;
17 import be
.nikiroo
.fanfix
.Instance
;
18 import be
.nikiroo
.fanfix
.bundles
.Config
;
19 import be
.nikiroo
.fanfix
.bundles
.StringId
;
20 import be
.nikiroo
.fanfix
.data
.Chapter
;
21 import be
.nikiroo
.fanfix
.data
.MetaData
;
22 import be
.nikiroo
.fanfix
.data
.Paragraph
;
23 import be
.nikiroo
.fanfix
.data
.Paragraph
.ParagraphType
;
24 import be
.nikiroo
.fanfix
.data
.Story
;
25 import be
.nikiroo
.utils
.IOUtils
;
26 import be
.nikiroo
.utils
.StringUtils
;
27 import be
.nikiroo
.utils
.ui
.Progress
;
30 * This class is the base class used by the other support classes. It can be
31 * used outside of this package, and have static method that you can use to get
32 * access to the correct support class.
34 * It will be used with 'resources' (usually web pages or files).
38 public abstract class BasicSupport
{
40 * The supported input types for which we can get a {@link BasicSupport}
45 public enum SupportType
{
46 /** EPUB files created with this program */
48 /** Pure text file with some rules */
50 /** TEXT but with associated .info file */
52 /** My Little Pony fanfictions */
54 /** Fanfictions from a lot of different universes */
56 /** Website with lots of Mangas */
58 /** Furry website with comics support */
64 * A description of this support type (more information than the
65 * {@link BasicSupport#getSourceName()}).
67 * @return the description
69 public String
getDesc() {
70 String desc
= Instance
.getTrans().getStringX(StringId
.INPUT_DESC
,
74 desc
= Instance
.getTrans().getString(StringId
.INPUT_DESC
, this);
81 * The name of this support type (a short version).
85 public String
getSourceName() {
86 BasicSupport support
= BasicSupport
.getSupport(this);
87 if (support
!= null) {
88 return support
.getSourceName();
95 public String
toString() {
96 return super.toString().toLowerCase();
100 * Call {@link SupportType#valueOf(String.toUpperCase())}.
103 * the possible type name
105 * @return NULL or the type
107 public static SupportType
valueOfUC(String typeName
) {
108 return SupportType
.valueOf(typeName
== null ?
null : typeName
113 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
114 * NULL for NULL instead of raising exception.
117 * the possible type name
119 * @return NULL or the type
121 public static SupportType
valueOfNullOkUC(String typeName
) {
122 if (typeName
== null) {
126 return SupportType
.valueOfUC(typeName
);
130 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
131 * NULL in case of error instead of raising an exception.
134 * the possible type name
136 * @return NULL or the type
138 public static SupportType
valueOfAllOkUC(String typeName
) {
140 return SupportType
.valueOfUC(typeName
);
141 } catch (Exception e
) {
147 private InputStream in
;
148 private SupportType type
;
149 private URL currentReferer
; // with on 'r', as in 'HTTP'...
152 private char openQuote
= Instance
.getTrans().getChar(
153 StringId
.OPEN_SINGLE_QUOTE
);
154 private char closeQuote
= Instance
.getTrans().getChar(
155 StringId
.CLOSE_SINGLE_QUOTE
);
156 private char openDoubleQuote
= Instance
.getTrans().getChar(
157 StringId
.OPEN_DOUBLE_QUOTE
);
158 private char closeDoubleQuote
= Instance
.getTrans().getChar(
159 StringId
.CLOSE_DOUBLE_QUOTE
);
162 * The name of this support class.
166 protected abstract String
getSourceName();
169 * Check if the given resource is supported by this {@link BasicSupport}.
172 * the resource to check for
174 * @return TRUE if it is
176 protected abstract boolean supports(URL url
);
179 * Return TRUE if the support will return HTML encoded content values for
180 * the chapters content.
182 * @return TRUE for HTML
184 protected abstract boolean isHtml();
186 protected abstract MetaData
getMeta(URL source
, InputStream in
)
190 * Return the story description.
193 * the source of the story
195 * the input (the main resource)
197 * @return the description
199 * @throws IOException
200 * in case of I/O error
202 protected abstract String
getDesc(URL source
, InputStream in
)
206 * Return the list of chapters (name and resource).
209 * the source of the story
211 * the input (the main resource)
213 * @return the chapters
215 * @throws IOException
216 * in case of I/O error
218 protected abstract List
<Entry
<String
, URL
>> getChapters(URL source
,
219 InputStream in
) throws IOException
;
222 * Return the content of the chapter (possibly HTML encoded, if
223 * {@link BasicSupport#isHtml()} is TRUE).
226 * the source of the story
228 * the input (the main resource)
232 * @return the content
234 * @throws IOException
235 * in case of I/O error
237 protected abstract String
getChapterContent(URL source
, InputStream in
,
238 int number
) throws IOException
;
241 * Return the list of cookies (values included) that must be used to
242 * correctly fetch the resources.
244 * You are expected to call the super method implementation if you override
247 * @return the cookies
249 public Map
<String
, String
> getCookies() {
250 return new HashMap
<String
, String
>();
254 * Process the given story resource into a partially filled {@link Story}
255 * object containing the name and metadata, except for the description.
260 * @return the {@link Story}
262 * @throws IOException
263 * in case of I/O error
265 public Story
processMeta(URL url
) throws IOException
{
266 return processMeta(url
, true, false);
270 * Process the given story resource into a partially filled {@link Story}
271 * object containing the name and metadata.
277 * close "this" and "in" when done
279 * @return the {@link Story}
281 * @throws IOException
282 * in case of I/O error
284 protected Story
processMeta(URL url
, boolean close
, boolean getDesc
)
286 in
= Instance
.getCache().open(url
, this, false);
292 preprocess(url
, getInput());
294 Story story
= new Story();
295 MetaData meta
= getMeta(url
, getInput());
298 if (meta
!= null && meta
.getCover() == null) {
299 meta
.setCover(getDefaultCover(meta
.getSubject()));
303 String descChapterName
= Instance
.getTrans().getString(
304 StringId
.DESCRIPTION
);
305 story
.getMeta().setResume(
306 makeChapter(url
, 0, descChapterName
,
307 getDesc(url
, getInput())));
315 } catch (IOException e
) {
327 * Process the given story resource into a fully filled {@link Story}
333 * the optional progress reporter
335 * @return the {@link Story}
337 * @throws IOException
338 * in case of I/O error
340 public Story
process(URL url
, Progress pg
) throws IOException
{
344 pg
.setMinMax(0, 100);
347 setCurrentReferer(url
);
351 Story story
= processMeta(url
, false, true);
358 story
.setChapters(new ArrayList
<Chapter
>());
360 List
<Entry
<String
, URL
>> chapters
= getChapters(url
, getInput());
364 if (chapters
!= null) {
365 Progress pgChaps
= new Progress(0, chapters
.size());
366 pg
.addProgress(pgChaps
, 80);
368 for (Entry
<String
, URL
> chap
: chapters
) {
369 setCurrentReferer(chap
.getValue());
370 InputStream chapIn
= Instance
.getCache().open(
371 chap
.getValue(), this, true);
373 story
.getChapters().add(
374 makeChapter(url
, i
, chap
.getKey(),
375 getChapterContent(url
, chapIn
, i
)));
380 pgChaps
.setProgress(i
);
392 } catch (IOException e
) {
400 currentReferer
= null;
409 public SupportType
getType() {
414 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
415 * the current {@link URL} we work on.
417 * @return the referer
419 public URL
getCurrentReferer() {
420 return currentReferer
;
424 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
425 * the current {@link URL} we work on.
427 * @param currentReferer
430 protected void setCurrentReferer(URL currentReferer
) {
431 this.currentReferer
= currentReferer
;
442 protected BasicSupport
setType(SupportType type
) {
448 * Prepare the support if needed before processing.
451 * the source of the story
453 * the input (the main resource)
455 * @throws IOException
458 protected void preprocess(URL source
, InputStream in
) throws IOException
{
462 * Now that we have processed the {@link Story}, close the resources if any.
464 * @throws IOException
467 protected void close() throws IOException
{
471 * Create a {@link Chapter} object from the given information, formatting
472 * the content as it should be.
479 * the chapter content
481 * @return the {@link Chapter}
483 * @throws IOException
484 * in case of I/O error
486 protected Chapter
makeChapter(URL source
, int number
, String name
,
487 String content
) throws IOException
{
488 // Chapter name: process it correctly, then remove the possible
489 // redundant "Chapter x: " in front of it
490 String chapterName
= processPara(name
).getContent().trim();
491 for (String lang
: Instance
.getConfig().getString(Config
.CHAPTER
)
493 String chapterWord
= Instance
.getConfig().getStringX(
494 Config
.CHAPTER
, lang
);
495 if (chapterName
.startsWith(chapterWord
)) {
496 chapterName
= chapterName
.substring(chapterWord
.length())
502 if (chapterName
.startsWith(Integer
.toString(number
))) {
503 chapterName
= chapterName
.substring(
504 Integer
.toString(number
).length()).trim();
507 if (chapterName
.startsWith(":")) {
508 chapterName
= chapterName
.substring(1).trim();
512 Chapter chap
= new Chapter(number
, chapterName
);
514 if (content
== null) {
519 // Special <HR> processing:
520 content
= content
.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
524 InputStream in
= new ByteArrayInputStream(content
.getBytes("UTF-8"));
526 @SuppressWarnings("resource")
527 Scanner scan
= new Scanner(in
, "UTF-8");
528 scan
.useDelimiter("(\\n|</p>)"); // \n for test, </p> for html
530 List
<Paragraph
> paras
= new ArrayList
<Paragraph
>();
531 while (scan
.hasNext()) {
532 String line
= scan
.next().trim();
533 boolean image
= false;
534 if (line
.startsWith("[") && line
.endsWith("]")) {
535 URL url
= getImageUrl(this, source
,
536 line
.substring(1, line
.length() - 1).trim());
538 paras
.add(new Paragraph(url
));
544 paras
.add(processPara(line
));
548 // Check quotes for "bad" format
549 List
<Paragraph
> newParas
= new ArrayList
<Paragraph
>();
550 for (Paragraph para
: paras
) {
551 newParas
.addAll(requotify(para
));
555 // Remove double blanks/brks
556 boolean space
= false;
558 for (int i
= 0; i
< paras
.size(); i
++) {
559 Paragraph para
= paras
.get(i
);
560 boolean thisSpace
= para
.getType() == ParagraphType
.BLANK
;
561 boolean thisBrk
= para
.getType() == ParagraphType
.BREAK
;
563 if (space
&& thisBrk
) {
566 } else if ((space
|| brk
) && (thisSpace
|| thisBrk
)) {
575 // Remove blank/brk at start
577 && (paras
.get(0).getType() == ParagraphType
.BLANK
|| paras
578 .get(0).getType() == ParagraphType
.BREAK
)) {
582 // Remove blank/brk at end
583 int last
= paras
.size() - 1;
585 && (paras
.get(last
).getType() == ParagraphType
.BLANK
|| paras
586 .get(last
).getType() == ParagraphType
.BREAK
)) {
590 chap
.setParagraphs(paras
);
598 static BufferedImage
getDefaultCover(String subject
) {
599 if (subject
!= null && !subject
.isEmpty()
600 && Instance
.getCoverDir() != null) {
602 File fileCover
= new File(Instance
.getCoverDir(), subject
);
603 return getImage(null, fileCover
.toURI().toURL(), subject
);
604 } catch (MalformedURLException e
) {
612 * Return the list of supported image extensions.
614 * @return the extensions
616 static String
[] getImageExt(boolean emptyAllowed
) {
618 return new String
[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
620 return new String
[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
624 static BufferedImage
getImage(BasicSupport support
, URL source
, String line
) {
625 URL url
= getImageUrl(support
, source
, line
);
627 InputStream in
= null;
629 in
= Instance
.getCache().open(url
, getSupport(url
), true);
630 return IOUtils
.toImage(in
);
631 } catch (IOException e
) {
636 } catch (IOException e
) {
646 * Check if the given resource can be a local image or a remote image, then
647 * refresh the cache with it if it is.
652 * the resource to check
654 * @return the image URL if found, or NULL
657 static URL
getImageUrl(BasicSupport support
, URL source
, String line
) {
663 if (source
!= null) {
664 path
= new File(source
.getFile()).getParent();
666 String basePath
= new File(new File(path
), line
.trim())
668 for (String ext
: getImageExt(true)) {
669 if (new File(basePath
+ ext
).exists()) {
670 url
= new File(basePath
+ ext
).toURI().toURL();
673 } catch (Exception e
) {
674 // Nothing to do here
681 for (String ext
: getImageExt(true)) {
682 if (Instance
.getCache().check(new URL(line
+ ext
))) {
683 url
= new URL(line
+ ext
);
690 for (String ext
: getImageExt(true)) {
692 url
= new URL(line
+ ext
);
693 Instance
.getCache().refresh(url
, support
, true);
695 } catch (IOException e
) {
696 // no image with this ext
701 } catch (MalformedURLException e
) {
706 // refresh the cached file
709 Instance
.getCache().refresh(url
, support
, true);
710 } catch (IOException e
) {
711 // woops, broken image
720 protected InputStream
reset(InputStream in
) {
723 } catch (IOException e
) {
729 * Reset then return {@link BasicSupport#in}.
731 * @return {@link BasicSupport#in}
733 protected InputStream
getInput() {
738 * Fix the author name if it is prefixed with some "by" {@link String}.
741 * the author with a possible prefix
743 * @return the author without prefixes
745 protected String
fixAuthor(String author
) {
746 if (author
!= null) {
747 for (String suffix
: new String
[] { " ", ":" }) {
748 for (String byString
: Instance
.getConfig()
749 .getString(Config
.BYS
).split(",")) {
751 if (author
.toUpperCase().startsWith(byString
.toUpperCase())) {
752 author
= author
.substring(byString
.length()).trim();
757 // Special case (without suffix):
758 if (author
.startsWith("©")) {
759 author
= author
.substring(1);
767 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
768 * and requotify them (i.e., separate them into QUOTE paragraphs and other
769 * paragraphs (quotes or not)).
772 * the paragraph to requotify (not necessaraly a quote)
774 * @return the correctly (or so we hope) quotified paragraphs
776 private List
<Paragraph
> requotify(Paragraph para
) {
777 List
<Paragraph
> newParas
= new ArrayList
<Paragraph
>();
779 if (para
.getType() == ParagraphType
.QUOTE
780 && para
.getContent().length() > 2) {
781 String line
= para
.getContent();
782 boolean singleQ
= line
.startsWith("" + openQuote
);
783 boolean doubleQ
= line
.startsWith("" + openDoubleQuote
);
785 // Do not try when more than one quote at a time
786 // (some stories are not easily readable if we do)
788 && line
.indexOf(closeQuote
, 1) < line
789 .lastIndexOf(closeQuote
)) {
794 && line
.indexOf(closeDoubleQuote
, 1) < line
795 .lastIndexOf(closeDoubleQuote
)) {
801 if (!singleQ
&& !doubleQ
) {
802 line
= openDoubleQuote
+ line
+ closeDoubleQuote
;
803 newParas
.add(new Paragraph(ParagraphType
.QUOTE
, line
));
805 char open
= singleQ ? openQuote
: openDoubleQuote
;
806 char close
= singleQ ? closeQuote
: closeDoubleQuote
;
809 boolean inQuote
= false;
811 for (char car
: line
.toCharArray()) {
814 } else if (car
== close
) {
816 } else if (car
== '.' && !inQuote
) {
824 String rest
= line
.substring(posDot
+ 1).trim();
825 line
= line
.substring(0, posDot
+ 1).trim();
826 newParas
.add(new Paragraph(ParagraphType
.QUOTE
, line
));
827 if (!rest
.isEmpty()) {
828 newParas
.addAll(requotify(processPara(rest
)));
842 * Process a {@link Paragraph} from a raw line of text.
844 * Will also fix quotes and HTML encoding if needed.
849 * @return the processed {@link Paragraph}
851 private Paragraph
processPara(String line
) {
852 line
= ifUnhtml(line
).trim();
854 boolean space
= true;
856 boolean quote
= false;
857 boolean tentativeCloseQuote
= false;
861 StringBuilder builder
= new StringBuilder();
862 for (char car
: line
.toCharArray()) {
865 // dash, ndash and mdash: - – —
866 // currently: always use mdash
867 builder
.append(dashCount
== 1 ?
'-' : '—');
872 if (tentativeCloseQuote
) {
873 tentativeCloseQuote
= false;
874 if ((car
>= 'a' && car
<= 'z') || (car
>= 'A' && car
<= 'Z')
875 || (car
>= '0' && car
<= '9')) {
878 builder
.append(closeQuote
);
883 case ' ': // note: unbreakable space
886 case '\n': // just in case
887 case '\r': // just in case
892 if (space
|| (brk
&& quote
)) {
894 builder
.append(openQuote
);
895 } else if (prev
== ' ') {
896 builder
.append(openQuote
);
898 // it is a quote ("I'm off") or a 'quote' ("This
899 // 'good' restaurant"...)
900 tentativeCloseQuote
= true;
905 if (space
|| (brk
&& quote
)) {
907 builder
.append(openDoubleQuote
);
908 } else if (prev
== ' ') {
909 builder
.append(openDoubleQuote
);
911 builder
.append(closeDoubleQuote
);
945 if (space
|| (brk
&& quote
)) {
947 builder
.append(openQuote
);
949 builder
.append(openQuote
);
962 builder
.append(closeQuote
);
970 if (space
|| (brk
&& quote
)) {
972 builder
.append(openDoubleQuote
);
974 builder
.append(openDoubleQuote
);
987 builder
.append(closeDoubleQuote
);
1000 if (tentativeCloseQuote
) {
1001 tentativeCloseQuote
= false;
1002 builder
.append(closeQuote
);
1005 line
= builder
.toString().trim();
1007 ParagraphType type
= ParagraphType
.NORMAL
;
1009 type
= ParagraphType
.BLANK
;
1011 type
= ParagraphType
.BREAK
;
1013 type
= ParagraphType
.QUOTE
;
1016 return new Paragraph(type
, line
);
1020 * Remove the HTML from the inpit <b>if</b> {@link BasicSupport#isHtml()} is
1026 * @return the no html version if needed
1028 private String
ifUnhtml(String input
) {
1029 if (isHtml() && input
!= null) {
1030 return StringUtils
.unhtml(input
);
1037 * Return a {@link BasicSupport} implementation supporting the given
1038 * resource if possible.
1041 * the story resource
1043 * @return an implementation that supports it, or NULL
1045 public static BasicSupport
getSupport(URL url
) {
1050 // TEXT and INFO_TEXT always support files (not URLs though)
1051 for (SupportType type
: SupportType
.values()) {
1052 if (type
!= SupportType
.TEXT
&& type
!= SupportType
.INFO_TEXT
) {
1053 BasicSupport support
= getSupport(type
);
1054 if (support
!= null && support
.supports(url
)) {
1060 for (SupportType type
: new SupportType
[] { SupportType
.TEXT
,
1061 SupportType
.INFO_TEXT
}) {
1062 BasicSupport support
= getSupport(type
);
1063 if (support
!= null && support
.supports(url
)) {
1072 * Return a {@link BasicSupport} implementation supporting the given type.
1077 * @return an implementation that supports it, or NULL
1079 public static BasicSupport
getSupport(SupportType type
) {
1082 return new Epub().setType(type
);
1084 return new InfoText().setType(type
);
1086 return new Fimfiction().setType(type
);
1088 return new Fanfiction().setType(type
);
1090 return new Text().setType(type
);
1092 return new MangaFox().setType(type
);
1094 return new E621().setType(type
);
1096 return new Cbz().setType(type
);
1103 * Return the first line from the given input which correspond to the given
1109 * a string that must be found inside the target line (also
1110 * supports "^" at start to say "only if it starts with" the
1112 * @param relativeLine
1113 * the line to return based upon the target line position (-1 =
1114 * the line before, 0 = the target line...)
1118 static String
getLine(InputStream in
, String needle
, int relativeLine
) {
1119 return getLine(in
, needle
, relativeLine
, true);
1123 * Return a line from the given input which correspond to the given
1129 * a string that must be found inside the target line (also
1130 * supports "^" at start to say "only if it starts with" the
1132 * @param relativeLine
1133 * the line to return based upon the target line position (-1 =
1134 * the line before, 0 = the target line...)
1136 * takes the first result (as opposed to the last one, which will
1137 * also always spend the input)
1141 static String
getLine(InputStream in
, String needle
, int relativeLine
,
1147 } catch (IOException e
) {
1151 List
<String
> lines
= new ArrayList
<String
>();
1152 @SuppressWarnings("resource")
1153 Scanner scan
= new Scanner(in
, "UTF-8");
1155 scan
.useDelimiter("\\n");
1156 while (scan
.hasNext()) {
1157 lines
.add(scan
.next());
1160 if (needle
.startsWith("^")) {
1161 if (lines
.get(lines
.size() - 1).startsWith(
1162 needle
.substring(1))) {
1163 index
= lines
.size() - 1;
1167 if (lines
.get(lines
.size() - 1).contains(needle
)) {
1168 index
= lines
.size() - 1;
1173 if (index
>= 0 && index
+ relativeLine
< lines
.size()) {
1174 rep
= lines
.get(index
+ relativeLine
);