1 package be
.nikiroo
.fanfix
.supported
;
3 import java
.awt
.image
.BufferedImage
;
4 import java
.io
.ByteArrayInputStream
;
6 import java
.io
.IOException
;
7 import java
.io
.InputStream
;
8 import java
.net
.MalformedURLException
;
10 import java
.util
.ArrayList
;
11 import java
.util
.HashMap
;
12 import java
.util
.List
;
14 import java
.util
.Map
.Entry
;
15 import java
.util
.Scanner
;
17 import be
.nikiroo
.fanfix
.Instance
;
18 import be
.nikiroo
.fanfix
.bundles
.Config
;
19 import be
.nikiroo
.fanfix
.bundles
.StringId
;
20 import be
.nikiroo
.fanfix
.data
.Chapter
;
21 import be
.nikiroo
.fanfix
.data
.MetaData
;
22 import be
.nikiroo
.fanfix
.data
.Paragraph
;
23 import be
.nikiroo
.fanfix
.data
.Paragraph
.ParagraphType
;
24 import be
.nikiroo
.fanfix
.data
.Story
;
25 import be
.nikiroo
.utils
.IOUtils
;
26 import be
.nikiroo
.utils
.StringUtils
;
29 * This class is the base class used by the other support classes. It can be
30 * used outside of this package, and have static method that you can use to get
31 * access to the correct support class.
33 * It will be used with 'resources' (usually web pages or files).
37 public abstract class BasicSupport
{
39 * The supported input types for which we can get a {@link BasicSupport}
44 public enum SupportType
{
45 /** EPUB files created with this program */
47 /** Pure text file with some rules */
49 /** TEXT but with associated .info file */
51 /** My Little Pony fanfictions */
53 /** Fanfictions from a lot of different universes */
55 /** Website with lots of Mangas */
57 /** Furry website with comics support */
63 * A description of this support type (more information than the
64 * {@link BasicSupport#getSourceName()}).
66 * @return the description
68 public String
getDesc() {
69 String desc
= Instance
.getTrans().getStringX(StringId
.INPUT_DESC
,
73 desc
= Instance
.getTrans().getString(StringId
.INPUT_DESC
, this);
80 * The name of this support type (a short version).
84 public String
getSourceName() {
85 BasicSupport support
= BasicSupport
.getSupport(this);
86 if (support
!= null) {
87 return support
.getSourceName();
94 public String
toString() {
95 return super.toString().toLowerCase();
99 * Call {@link SupportType#valueOf(String.toUpperCase())}.
102 * the possible type name
104 * @return NULL or the type
106 public static SupportType
valueOfUC(String typeName
) {
107 return SupportType
.valueOf(typeName
== null ?
null : typeName
112 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
113 * NULL for NULL instead of raising exception.
116 * the possible type name
118 * @return NULL or the type
120 public static SupportType
valueOfNullOkUC(String typeName
) {
121 if (typeName
== null) {
125 return SupportType
.valueOfUC(typeName
);
129 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
130 * NULL in case of error instead of raising an exception.
133 * the possible type name
135 * @return NULL or the type
137 public static SupportType
valueOfAllOkUC(String typeName
) {
139 return SupportType
.valueOfUC(typeName
);
140 } catch (Exception e
) {
146 private InputStream in
;
147 private SupportType type
;
148 private URL currentReferer
; // with on 'r', as in 'HTTP'...
151 private char openQuote
= Instance
.getTrans().getChar(
152 StringId
.OPEN_SINGLE_QUOTE
);
153 private char closeQuote
= Instance
.getTrans().getChar(
154 StringId
.CLOSE_SINGLE_QUOTE
);
155 private char openDoubleQuote
= Instance
.getTrans().getChar(
156 StringId
.OPEN_DOUBLE_QUOTE
);
157 private char closeDoubleQuote
= Instance
.getTrans().getChar(
158 StringId
.CLOSE_DOUBLE_QUOTE
);
161 * The name of this support class.
165 protected abstract String
getSourceName();
168 * Check if the given resource is supported by this {@link BasicSupport}.
171 * the resource to check for
173 * @return TRUE if it is
175 protected abstract boolean supports(URL url
);
178 * Return TRUE if the support will return HTML encoded content values for
179 * the chapters content.
181 * @return TRUE for HTML
183 protected abstract boolean isHtml();
185 protected abstract MetaData
getMeta(URL source
, InputStream in
)
189 * Return the story description.
192 * the source of the story
194 * the input (the main resource)
196 * @return the description
198 * @throws IOException
199 * in case of I/O error
201 protected abstract String
getDesc(URL source
, InputStream in
)
205 * Return the list of chapters (name and resource).
208 * the source of the story
210 * the input (the main resource)
212 * @return the chapters
214 * @throws IOException
215 * in case of I/O error
217 protected abstract List
<Entry
<String
, URL
>> getChapters(URL source
,
218 InputStream in
) throws IOException
;
221 * Return the content of the chapter (possibly HTML encoded, if
222 * {@link BasicSupport#isHtml()} is TRUE).
225 * the source of the story
227 * the input (the main resource)
231 * @return the content
233 * @throws IOException
234 * in case of I/O error
236 protected abstract String
getChapterContent(URL source
, InputStream in
,
237 int number
) throws IOException
;
240 * Return the list of cookies (values included) that must be used to
241 * correctly fetch the resources.
243 * You are expected to call the super method implementation if you override
246 * @return the cookies
248 public Map
<String
, String
> getCookies() {
249 return new HashMap
<String
, String
>();
253 * Process the given story resource into a partially filled {@link Story}
254 * object containing the name and metadata, except for the description.
259 * @return the {@link Story}
261 * @throws IOException
262 * in case of I/O error
264 public Story
processMeta(URL url
) throws IOException
{
265 return processMeta(url
, true, false);
269 * Process the given story resource into a partially filled {@link Story}
270 * object containing the name and metadata.
276 * close "this" and "in" when done
278 * @return the {@link Story}
280 * @throws IOException
281 * in case of I/O error
283 protected Story
processMeta(URL url
, boolean close
, boolean getDesc
)
285 in
= Instance
.getCache().open(url
, this, false);
291 preprocess(url
, getInput());
293 Story story
= new Story();
294 MetaData meta
= getMeta(url
, getInput());
297 if (meta
!= null && meta
.getCover() == null) {
298 meta
.setCover(getDefaultCover(meta
.getSubject()));
302 String descChapterName
= Instance
.getTrans().getString(
303 StringId
.DESCRIPTION
);
304 story
.getMeta().setResume(
305 makeChapter(url
, 0, descChapterName
,
306 getDesc(url
, getInput())));
314 } catch (IOException e
) {
326 * Process the given story resource into a fully filled {@link Story}
332 * @return the {@link Story}
334 * @throws IOException
335 * in case of I/O error
337 public Story
process(URL url
) throws IOException
{
338 setCurrentReferer(url
);
341 Story story
= processMeta(url
, false, true);
346 story
.setChapters(new ArrayList
<Chapter
>());
348 List
<Entry
<String
, URL
>> chapters
= getChapters(url
, getInput());
350 if (chapters
!= null) {
351 for (Entry
<String
, URL
> chap
: chapters
) {
352 setCurrentReferer(chap
.getValue());
353 InputStream chapIn
= Instance
.getCache().open(
354 chap
.getValue(), this, true);
356 story
.getChapters().add(
357 makeChapter(url
, i
, chap
.getKey(),
358 getChapterContent(url
, chapIn
, i
)));
372 } catch (IOException e
) {
380 currentReferer
= null;
389 public SupportType
getType() {
394 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
395 * the current {@link URL} we work on.
397 * @return the referer
399 public URL
getCurrentReferer() {
400 return currentReferer
;
404 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
405 * the current {@link URL} we work on.
407 * @param currentReferer
410 protected void setCurrentReferer(URL currentReferer
) {
411 this.currentReferer
= currentReferer
;
422 protected BasicSupport
setType(SupportType type
) {
428 * Prepare the support if needed before processing.
431 * the source of the story
433 * the input (the main resource)
435 * @throws IOException
438 protected void preprocess(URL source
, InputStream in
) throws IOException
{
442 * Now that we have processed the {@link Story}, close the resources if any.
444 * @throws IOException
447 protected void close() throws IOException
{
451 * Create a {@link Chapter} object from the given information, formatting
452 * the content as it should be.
459 * the chapter content
461 * @return the {@link Chapter}
463 * @throws IOException
464 * in case of I/O error
466 protected Chapter
makeChapter(URL source
, int number
, String name
,
467 String content
) throws IOException
{
469 // Chapter name: process it correctly, then remove the possible
470 // redundant "Chapter x: " in front of it
471 String chapterName
= processPara(name
).getContent().trim();
472 for (String lang
: Instance
.getConfig().getString(Config
.CHAPTER
)
474 String chapterWord
= Instance
.getConfig().getStringX(
475 Config
.CHAPTER
, lang
);
476 if (chapterName
.startsWith(chapterWord
)) {
477 chapterName
= chapterName
.substring(chapterWord
.length())
483 if (chapterName
.startsWith(Integer
.toString(number
))) {
484 chapterName
= chapterName
.substring(
485 Integer
.toString(number
).length()).trim();
488 if (chapterName
.startsWith(":")) {
489 chapterName
= chapterName
.substring(1).trim();
493 Chapter chap
= new Chapter(number
, chapterName
);
495 if (content
== null) {
500 // Special <HR> processing:
501 content
= content
.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
505 InputStream in
= new ByteArrayInputStream(content
.getBytes("UTF-8"));
507 @SuppressWarnings("resource")
508 Scanner scan
= new Scanner(in
, "UTF-8");
509 scan
.useDelimiter("(\\n|</p>)"); // \n for test, </p> for html
511 List
<Paragraph
> paras
= new ArrayList
<Paragraph
>();
512 while (scan
.hasNext()) {
513 String line
= scan
.next().trim();
514 boolean image
= false;
515 if (line
.startsWith("[") && line
.endsWith("]")) {
516 URL url
= getImageUrl(this, source
,
517 line
.substring(1, line
.length() - 1).trim());
519 paras
.add(new Paragraph(url
));
525 paras
.add(processPara(line
));
529 // Check quotes for "bad" format
530 List
<Paragraph
> newParas
= new ArrayList
<Paragraph
>();
531 for (Paragraph para
: paras
) {
532 newParas
.addAll(requotify(para
));
536 // Remove double blanks/brks
537 boolean space
= false;
539 for (int i
= 0; i
< paras
.size(); i
++) {
540 Paragraph para
= paras
.get(i
);
541 boolean thisSpace
= para
.getType() == ParagraphType
.BLANK
;
542 boolean thisBrk
= para
.getType() == ParagraphType
.BREAK
;
544 if (space
&& thisBrk
) {
547 } else if ((space
|| brk
) && (thisSpace
|| thisBrk
)) {
556 // Remove blank/brk at start
558 && (paras
.get(0).getType() == ParagraphType
.BLANK
|| paras
559 .get(0).getType() == ParagraphType
.BREAK
)) {
563 // Remove blank/brk at end
564 int last
= paras
.size() - 1;
566 && (paras
.get(last
).getType() == ParagraphType
.BLANK
|| paras
567 .get(last
).getType() == ParagraphType
.BREAK
)) {
571 chap
.setParagraphs(paras
);
579 static BufferedImage
getDefaultCover(String subject
) {
580 if (subject
!= null && !subject
.isEmpty()
581 && Instance
.getCoverDir() != null) {
583 File fileCover
= new File(Instance
.getCoverDir(), subject
);
584 return getImage(null, fileCover
.toURI().toURL(), subject
);
585 } catch (MalformedURLException e
) {
593 * Return the list of supported image extensions.
595 * @return the extensions
597 static String
[] getImageExt(boolean emptyAllowed
) {
599 return new String
[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
601 return new String
[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
605 static BufferedImage
getImage(BasicSupport support
, URL source
, String line
) {
606 URL url
= getImageUrl(support
, source
, line
);
608 InputStream in
= null;
610 in
= Instance
.getCache().open(url
, getSupport(url
), true);
611 return IOUtils
.toImage(in
);
612 } catch (IOException e
) {
617 } catch (IOException e
) {
627 * Check if the given resource can be a local image or a remote image, then
628 * refresh the cache with it if it is.
633 * the resource to check
635 * @return the image URL if found, or NULL
638 static URL
getImageUrl(BasicSupport support
, URL source
, String line
) {
644 if (source
!= null) {
645 path
= new File(source
.getFile()).getParent();
647 String basePath
= new File(new File(path
), line
.trim())
649 for (String ext
: getImageExt(true)) {
650 if (new File(basePath
+ ext
).exists()) {
651 url
= new File(basePath
+ ext
).toURI().toURL();
654 } catch (Exception e
) {
655 // Nothing to do here
662 for (String ext
: getImageExt(true)) {
663 if (Instance
.getCache().check(new URL(line
+ ext
))) {
664 url
= new URL(line
+ ext
);
671 for (String ext
: getImageExt(true)) {
673 url
= new URL(line
+ ext
);
674 Instance
.getCache().refresh(url
, support
, true);
676 } catch (IOException e
) {
677 // no image with this ext
682 } catch (MalformedURLException e
) {
687 // refresh the cached file
690 Instance
.getCache().refresh(url
, support
, true);
691 } catch (IOException e
) {
692 // woops, broken image
701 protected InputStream
reset(InputStream in
) {
704 } catch (IOException e
) {
710 * Reset then return {@link BasicSupport#in}.
712 * @return {@link BasicSupport#in}
714 protected InputStream
getInput() {
719 * Fix the author name if it is prefixed with some "by" {@link String}.
722 * the author with a possible prefix
724 * @return the author without prefixes
726 protected String
fixAuthor(String author
) {
727 if (author
!= null) {
728 for (String suffix
: new String
[] { " ", ":" }) {
729 for (String byString
: Instance
.getConfig()
730 .getString(Config
.BYS
).split(",")) {
732 if (author
.toUpperCase().startsWith(byString
.toUpperCase())) {
733 author
= author
.substring(byString
.length()).trim();
738 // Special case (without suffix):
739 if (author
.startsWith("©")) {
740 author
= author
.substring(1);
748 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
749 * and requotify them (i.e., separate them into QUOTE paragraphs and other
750 * paragraphs (quotes or not)).
753 * the paragraph to requotify (not necessaraly a quote)
755 * @return the correctly (or so we hope) quotified paragraphs
757 private List
<Paragraph
> requotify(Paragraph para
) {
758 List
<Paragraph
> newParas
= new ArrayList
<Paragraph
>();
760 if (para
.getType() == ParagraphType
.QUOTE
761 && para
.getContent().length() > 2) {
762 String line
= para
.getContent();
763 boolean singleQ
= line
.startsWith("" + openQuote
);
764 boolean doubleQ
= line
.startsWith("" + openDoubleQuote
);
766 if (!singleQ
&& !doubleQ
) {
767 line
= openDoubleQuote
+ line
+ closeDoubleQuote
;
768 newParas
.add(new Paragraph(ParagraphType
.QUOTE
, line
));
770 char open
= singleQ ? openQuote
: openDoubleQuote
;
771 char close
= singleQ ? closeQuote
: closeDoubleQuote
;
774 boolean inQuote
= false;
776 for (char car
: line
.toCharArray()) {
779 } else if (car
== close
) {
781 } else if (car
== '.' && !inQuote
) {
789 String rest
= line
.substring(posDot
+ 1).trim();
790 line
= line
.substring(0, posDot
+ 1).trim();
791 newParas
.add(new Paragraph(ParagraphType
.QUOTE
, line
));
792 if (!rest
.isEmpty()) {
793 newParas
.addAll(requotify(processPara(rest
)));
807 * Process a {@link Paragraph} from a raw line of text.
809 * Will also fix quotes and HTML encoding if needed.
814 * @return the processed {@link Paragraph}
816 private Paragraph
processPara(String line
) {
817 line
= ifUnhtml(line
).trim();
819 boolean space
= true;
821 boolean quote
= false;
822 boolean tentativeCloseQuote
= false;
826 StringBuilder builder
= new StringBuilder();
827 for (char car
: line
.toCharArray()) {
830 // dash, ndash and mdash: - – —
831 // currently: always use mdash
832 builder
.append(dashCount
== 1 ?
'-' : '—');
837 if (tentativeCloseQuote
) {
838 tentativeCloseQuote
= false;
839 if ((car
>= 'a' && car
<= 'z') || (car
>= 'A' && car
<= 'Z')
840 || (car
>= '0' && car
<= '9')) {
843 builder
.append(closeQuote
);
848 case ' ': // note: unbreakable space
851 case '\n': // just in case
852 case '\r': // just in case
857 if (space
|| (brk
&& quote
)) {
859 builder
.append(openQuote
);
860 } else if (prev
== ' ') {
861 builder
.append(openQuote
);
863 // it is a quote ("I'm off") or a 'quote' ("This
864 // 'good' restaurant"...)
865 tentativeCloseQuote
= true;
870 if (space
|| (brk
&& quote
)) {
872 builder
.append(openDoubleQuote
);
873 } else if (prev
== ' ') {
874 builder
.append(openDoubleQuote
);
876 builder
.append(closeDoubleQuote
);
910 if (space
|| (brk
&& quote
)) {
912 builder
.append(openQuote
);
914 builder
.append(openQuote
);
927 builder
.append(closeQuote
);
935 if (space
|| (brk
&& quote
)) {
937 builder
.append(openDoubleQuote
);
939 builder
.append(openDoubleQuote
);
952 builder
.append(closeDoubleQuote
);
965 if (tentativeCloseQuote
) {
966 tentativeCloseQuote
= false;
967 builder
.append(closeQuote
);
970 line
= builder
.toString().trim();
972 ParagraphType type
= ParagraphType
.NORMAL
;
974 type
= ParagraphType
.BLANK
;
976 type
= ParagraphType
.BREAK
;
978 type
= ParagraphType
.QUOTE
;
981 return new Paragraph(type
, line
);
985 * Remove the HTML from the inpit <b>if</b> {@link BasicSupport#isHtml()} is
991 * @return the no html version if needed
993 private String
ifUnhtml(String input
) {
994 if (isHtml() && input
!= null) {
995 return StringUtils
.unhtml(input
);
1002 * Return a {@link BasicSupport} implementation supporting the given
1003 * resource if possible.
1006 * the story resource
1008 * @return an implementation that supports it, or NULL
1010 public static BasicSupport
getSupport(URL url
) {
1015 // TEXT and INFO_TEXT always support files (not URLs though)
1016 for (SupportType type
: SupportType
.values()) {
1017 if (type
!= SupportType
.TEXT
&& type
!= SupportType
.INFO_TEXT
) {
1018 BasicSupport support
= getSupport(type
);
1019 if (support
!= null && support
.supports(url
)) {
1025 for (SupportType type
: new SupportType
[] { SupportType
.TEXT
,
1026 SupportType
.INFO_TEXT
}) {
1027 BasicSupport support
= getSupport(type
);
1028 if (support
!= null && support
.supports(url
)) {
1037 * Return a {@link BasicSupport} implementation supporting the given type.
1042 * @return an implementation that supports it, or NULL
1044 public static BasicSupport
getSupport(SupportType type
) {
1047 return new Epub().setType(type
);
1049 return new InfoText().setType(type
);
1051 return new Fimfiction().setType(type
);
1053 return new Fanfiction().setType(type
);
1055 return new Text().setType(type
);
1057 return new MangaFox().setType(type
);
1059 return new E621().setType(type
);
1061 return new Cbz().setType(type
);
1068 * Return the first line from the given input which correspond to the given
1074 * a string that must be found inside the target line (also
1075 * supports "^" at start to say "only if it starts with" the
1077 * @param relativeLine
1078 * the line to return based upon the target line position (-1 =
1079 * the line before, 0 = the target line...)
1083 static String
getLine(InputStream in
, String needle
, int relativeLine
) {
1084 return getLine(in
, needle
, relativeLine
, true);
1088 * Return a line from the given input which correspond to the given
1094 * a string that must be found inside the target line (also
1095 * supports "^" at start to say "only if it starts with" the
1097 * @param relativeLine
1098 * the line to return based upon the target line position (-1 =
1099 * the line before, 0 = the target line...)
1101 * takes the first result (as opposed to the last one, which will
1102 * also always spend the input)
1106 static String
getLine(InputStream in
, String needle
, int relativeLine
,
1112 } catch (IOException e
) {
1116 List
<String
> lines
= new ArrayList
<String
>();
1117 @SuppressWarnings("resource")
1118 Scanner scan
= new Scanner(in
, "UTF-8");
1120 scan
.useDelimiter("\\n");
1121 while (scan
.hasNext()) {
1122 lines
.add(scan
.next());
1125 if (needle
.startsWith("^")) {
1126 if (lines
.get(lines
.size() - 1).startsWith(
1127 needle
.substring(1))) {
1128 index
= lines
.size() - 1;
1132 if (lines
.get(lines
.size() - 1).contains(needle
)) {
1133 index
= lines
.size() - 1;
1138 if (index
>= 0 && index
+ relativeLine
< lines
.size()) {
1139 rep
= lines
.get(index
+ relativeLine
);