1 package be
.nikiroo
.fanfix
.supported
;
3 import java
.awt
.image
.BufferedImage
;
4 import java
.io
.BufferedReader
;
5 import java
.io
.ByteArrayInputStream
;
7 import java
.io
.IOException
;
8 import java
.io
.InputStream
;
9 import java
.io
.InputStreamReader
;
10 import java
.net
.MalformedURLException
;
12 import java
.util
.ArrayList
;
13 import java
.util
.HashMap
;
14 import java
.util
.List
;
16 import java
.util
.Map
.Entry
;
17 import java
.util
.Scanner
;
19 import be
.nikiroo
.fanfix
.Instance
;
20 import be
.nikiroo
.fanfix
.bundles
.Config
;
21 import be
.nikiroo
.fanfix
.bundles
.StringId
;
22 import be
.nikiroo
.fanfix
.data
.Chapter
;
23 import be
.nikiroo
.fanfix
.data
.MetaData
;
24 import be
.nikiroo
.fanfix
.data
.Paragraph
;
25 import be
.nikiroo
.fanfix
.data
.Paragraph
.ParagraphType
;
26 import be
.nikiroo
.fanfix
.data
.Story
;
27 import be
.nikiroo
.utils
.IOUtils
;
28 import be
.nikiroo
.utils
.Progress
;
29 import be
.nikiroo
.utils
.StringUtils
;
32 * This class is the base class used by the other support classes. It can be
33 * used outside of this package, and have static method that you can use to get
34 * access to the correct support class.
36 * It will be used with 'resources' (usually web pages or files).
40 public abstract class BasicSupport
{
42 * The supported input types for which we can get a {@link BasicSupport}
47 public enum SupportType
{
48 /** EPUB files created with this program */
50 /** Pure text file with some rules */
52 /** TEXT but with associated .info file */
54 /** My Little Pony fanfictions */
56 /** Fanfictions from a lot of different universes */
58 /** Website with lots of Mangas */
60 /** Furry website with comics support */
68 * A description of this support type (more information than the
69 * {@link BasicSupport#getSourceName()}).
71 * @return the description
73 public String
getDesc() {
74 String desc
= Instance
.getTrans().getStringX(StringId
.INPUT_DESC
,
78 desc
= Instance
.getTrans().getString(StringId
.INPUT_DESC
, this);
85 * The name of this support type (a short version).
89 public String
getSourceName() {
90 BasicSupport support
= BasicSupport
.getSupport(this);
91 if (support
!= null) {
92 return support
.getSourceName();
99 public String
toString() {
100 return super.toString().toLowerCase();
104 * Call {@link SupportType#valueOf(String.toUpperCase())}.
107 * the possible type name
109 * @return NULL or the type
111 public static SupportType
valueOfUC(String typeName
) {
112 return SupportType
.valueOf(typeName
== null ?
null : typeName
117 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
118 * NULL for NULL instead of raising exception.
121 * the possible type name
123 * @return NULL or the type
125 public static SupportType
valueOfNullOkUC(String typeName
) {
126 if (typeName
== null) {
130 return SupportType
.valueOfUC(typeName
);
134 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
135 * NULL in case of error instead of raising an exception.
138 * the possible type name
140 * @return NULL or the type
142 public static SupportType
valueOfAllOkUC(String typeName
) {
144 return SupportType
.valueOfUC(typeName
);
145 } catch (Exception e
) {
151 private InputStream in
;
152 private SupportType type
;
153 private URL currentReferer
; // with on 'r', as in 'HTTP'...
156 private char openQuote
= Instance
.getTrans().getChar(
157 StringId
.OPEN_SINGLE_QUOTE
);
158 private char closeQuote
= Instance
.getTrans().getChar(
159 StringId
.CLOSE_SINGLE_QUOTE
);
160 private char openDoubleQuote
= Instance
.getTrans().getChar(
161 StringId
.OPEN_DOUBLE_QUOTE
);
162 private char closeDoubleQuote
= Instance
.getTrans().getChar(
163 StringId
.CLOSE_DOUBLE_QUOTE
);
166 * The name of this support class.
170 protected abstract String
getSourceName();
173 * Check if the given resource is supported by this {@link BasicSupport}.
176 * the resource to check for
178 * @return TRUE if it is
180 protected abstract boolean supports(URL url
);
183 * Return TRUE if the support will return HTML encoded content values for
184 * the chapters content.
186 * @return TRUE for HTML
188 protected abstract boolean isHtml();
190 protected abstract MetaData
getMeta(URL source
, InputStream in
)
194 * Return the story description.
197 * the source of the story
199 * the input (the main resource)
201 * @return the description
203 * @throws IOException
204 * in case of I/O error
206 protected abstract String
getDesc(URL source
, InputStream in
)
210 * Return the list of chapters (name and resource).
213 * the source of the story
215 * the input (the main resource)
217 * @return the chapters
219 * @throws IOException
220 * in case of I/O error
222 protected abstract List
<Entry
<String
, URL
>> getChapters(URL source
,
223 InputStream in
) throws IOException
;
226 * Return the content of the chapter (possibly HTML encoded, if
227 * {@link BasicSupport#isHtml()} is TRUE).
230 * the source of the story
232 * the input (the main resource)
236 * @return the content
238 * @throws IOException
239 * in case of I/O error
241 protected abstract String
getChapterContent(URL source
, InputStream in
,
242 int number
) throws IOException
;
245 * Return the list of cookies (values included) that must be used to
246 * correctly fetch the resources.
248 * You are expected to call the super method implementation if you override
251 * @return the cookies
253 public Map
<String
, String
> getCookies() {
254 return new HashMap
<String
, String
>();
258 * Process the given story resource into a partially filled {@link Story}
259 * object containing the name and metadata, except for the description.
264 * @return the {@link Story}
266 * @throws IOException
267 * in case of I/O error
269 public Story
processMeta(URL url
) throws IOException
{
270 return processMeta(url
, true, false);
274 * Process the given story resource into a partially filled {@link Story}
275 * object containing the name and metadata.
281 * close "this" and "in" when done
283 * @return the {@link Story}
285 * @throws IOException
286 * in case of I/O error
288 protected Story
processMeta(URL url
, boolean close
, boolean getDesc
)
296 preprocess(url
, getInput());
298 Story story
= new Story();
299 MetaData meta
= getMeta(url
, getInput());
302 if (meta
!= null && meta
.getCover() == null) {
303 meta
.setCover(getDefaultCover(meta
.getSubject()));
307 String descChapterName
= Instance
.getTrans().getString(
308 StringId
.DESCRIPTION
);
309 story
.getMeta().setResume(
310 makeChapter(url
, 0, descChapterName
,
311 getDesc(url
, getInput())));
319 } catch (IOException e
) {
331 * Process the given story resource into a fully filled {@link Story}
337 * the optional progress reporter
339 * @return the {@link Story}
341 * @throws IOException
342 * in case of I/O error
344 public Story
process(URL url
, Progress pg
) throws IOException
{
348 pg
.setMinMax(0, 100);
351 setCurrentReferer(url
);
355 Story story
= processMeta(url
, false, true);
362 story
.setChapters(new ArrayList
<Chapter
>());
364 List
<Entry
<String
, URL
>> chapters
= getChapters(url
, getInput());
368 if (chapters
!= null) {
369 Progress pgChaps
= new Progress(0, chapters
.size());
370 pg
.addProgress(pgChaps
, 80);
372 for (Entry
<String
, URL
> chap
: chapters
) {
373 setCurrentReferer(chap
.getValue());
374 InputStream chapIn
= Instance
.getCache().open(
375 chap
.getValue(), this, true);
377 story
.getChapters().add(
378 makeChapter(url
, i
, chap
.getKey(),
379 getChapterContent(url
, chapIn
, i
)));
384 pgChaps
.setProgress(i
++);
395 } catch (IOException e
) {
403 currentReferer
= null;
412 public SupportType
getType() {
417 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
418 * the current {@link URL} we work on.
420 * @return the referer
422 public URL
getCurrentReferer() {
423 return currentReferer
;
427 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
428 * the current {@link URL} we work on.
430 * @param currentReferer
433 protected void setCurrentReferer(URL currentReferer
) {
434 this.currentReferer
= currentReferer
;
445 protected BasicSupport
setType(SupportType type
) {
451 * Prepare the support if needed before processing.
454 * the source of the story
456 * the input (the main resource)
458 * @throws IOException
461 protected void preprocess(URL source
, InputStream in
) throws IOException
{
465 * Now that we have processed the {@link Story}, close the resources if any.
467 * @throws IOException
470 protected void close() throws IOException
{
474 * Create a {@link Chapter} object from the given information, formatting
475 * the content as it should be.
482 * the chapter content
484 * @return the {@link Chapter}
486 * @throws IOException
487 * in case of I/O error
489 protected Chapter
makeChapter(URL source
, int number
, String name
,
490 String content
) throws IOException
{
491 // Chapter name: process it correctly, then remove the possible
492 // redundant "Chapter x: " in front of it
493 String chapterName
= processPara(name
).getContent().trim();
494 for (String lang
: Instance
.getConfig().getString(Config
.CHAPTER
)
496 String chapterWord
= Instance
.getConfig().getStringX(
497 Config
.CHAPTER
, lang
);
498 if (chapterName
.startsWith(chapterWord
)) {
499 chapterName
= chapterName
.substring(chapterWord
.length())
505 if (chapterName
.startsWith(Integer
.toString(number
))) {
506 chapterName
= chapterName
.substring(
507 Integer
.toString(number
).length()).trim();
510 if (chapterName
.startsWith(":")) {
511 chapterName
= chapterName
.substring(1).trim();
515 Chapter chap
= new Chapter(number
, chapterName
);
517 if (content
!= null) {
518 chap
.setParagraphs(makeParagraphs(source
, content
));
526 * Convert the given content into {@link Paragraph}s.
529 * the source URL of the story
531 * the textual content
533 * @return the {@link Paragraph}s
535 * @throws IOException
536 * in case of I/O error
538 protected List
<Paragraph
> makeParagraphs(URL source
, String content
)
541 // Special <HR> processing:
542 content
= content
.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
546 List
<Paragraph
> paras
= new ArrayList
<Paragraph
>();
547 InputStream in
= new ByteArrayInputStream(content
.getBytes("UTF-8"));
549 BufferedReader buff
= new BufferedReader(new InputStreamReader(in
,
552 for (String encodedLine
= buff
.readLine(); encodedLine
!= null; encodedLine
= buff
556 lines
= encodedLine
.split("(<p>|</p>|<br>|<br/>|\\n)");
558 lines
= new String
[] { encodedLine
};
561 for (String aline
: lines
) {
562 String line
= aline
.trim();
565 if (line
.startsWith("[") && line
.endsWith("]")) {
566 image
= getImageUrl(this, source
,
567 line
.substring(1, line
.length() - 1).trim());
571 paras
.add(new Paragraph(image
));
573 paras
.add(processPara(line
));
581 // Check quotes for "bad" format
582 List
<Paragraph
> newParas
= new ArrayList
<Paragraph
>();
583 for (Paragraph para
: paras
) {
584 newParas
.addAll(requotify(para
));
588 // Remove double blanks/brks
589 fixBlanksBreaks(paras
);
595 * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
596 * those {@link Paragraph}s.
598 * The resulting list will not contain a starting or trailing blank/break
599 * nor 2 blanks or breaks following each other.
602 * the list of {@link Paragraph}s to fix
604 protected void fixBlanksBreaks(List
<Paragraph
> paras
) {
605 boolean space
= false;
607 for (int i
= 0; i
< paras
.size(); i
++) {
608 Paragraph para
= paras
.get(i
);
609 boolean thisSpace
= para
.getType() == ParagraphType
.BLANK
;
610 boolean thisBrk
= para
.getType() == ParagraphType
.BREAK
;
612 if (i
> 0 && space
&& thisBrk
) {
615 } else if ((space
|| brk
) && (thisSpace
|| thisBrk
)) {
624 // Remove blank/brk at start
626 && (paras
.get(0).getType() == ParagraphType
.BLANK
|| paras
.get(
627 0).getType() == ParagraphType
.BREAK
)) {
631 // Remove blank/brk at end
632 int last
= paras
.size() - 1;
634 && (paras
.get(last
).getType() == ParagraphType
.BLANK
|| paras
635 .get(last
).getType() == ParagraphType
.BREAK
)) {
641 * Get the default cover related to this subject (see <tt>.info</tt> files).
646 * @return the cover if any, or NULL
648 static BufferedImage
getDefaultCover(String subject
) {
649 if (subject
!= null && !subject
.isEmpty()
650 && Instance
.getCoverDir() != null) {
652 File fileCover
= new File(Instance
.getCoverDir(), subject
);
653 return getImage(null, fileCover
.toURI().toURL(), subject
);
654 } catch (MalformedURLException e
) {
662 * Return the list of supported image extensions.
664 * @return the extensions
666 static String
[] getImageExt(boolean emptyAllowed
) {
668 return new String
[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
670 return new String
[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
674 static BufferedImage
getImage(BasicSupport support
, URL source
, String line
) {
675 URL url
= getImageUrl(support
, source
, line
);
677 InputStream in
= null;
679 in
= Instance
.getCache().open(url
, getSupport(url
), true);
680 return IOUtils
.toImage(in
);
681 } catch (IOException e
) {
686 } catch (IOException e
) {
696 * Check if the given resource can be a local image or a remote image, then
697 * refresh the cache with it if it is.
702 * the resource to check
704 * @return the image URL if found, or NULL
707 static URL
getImageUrl(BasicSupport support
, URL source
, String line
) {
713 if (source
!= null) {
714 path
= new File(source
.getFile()).getParent();
716 String basePath
= new File(new File(path
), line
.trim())
718 for (String ext
: getImageExt(true)) {
719 if (new File(basePath
+ ext
).exists()) {
720 url
= new File(basePath
+ ext
).toURI().toURL();
723 } catch (Exception e
) {
724 // Nothing to do here
731 for (String ext
: getImageExt(true)) {
732 if (Instance
.getCache().check(new URL(line
+ ext
))) {
733 url
= new URL(line
+ ext
);
740 for (String ext
: getImageExt(true)) {
742 url
= new URL(line
+ ext
);
743 Instance
.getCache().refresh(url
, support
, true);
745 } catch (IOException e
) {
746 // no image with this ext
751 } catch (MalformedURLException e
) {
756 // refresh the cached file
759 Instance
.getCache().refresh(url
, support
, true);
760 } catch (IOException e
) {
761 // woops, broken image
771 * Open the input file that will be used through the support.
774 * the source {@link URL}
776 * @return the {@link InputStream}
778 * @throws IOException
779 * in case of I/O error
781 protected InputStream
openInput(URL source
) throws IOException
{
782 return Instance
.getCache().open(source
, this, false);
785 protected InputStream
reset(InputStream in
) {
788 } catch (IOException e
) {
794 * Reset then return {@link BasicSupport#in}.
796 * @return {@link BasicSupport#in}
798 protected InputStream
getInput() {
803 * Fix the author name if it is prefixed with some "by" {@link String}.
806 * the author with a possible prefix
808 * @return the author without prefixes
810 protected String
fixAuthor(String author
) {
811 if (author
!= null) {
812 for (String suffix
: new String
[] { " ", ":" }) {
813 for (String byString
: Instance
.getConfig()
814 .getString(Config
.BYS
).split(",")) {
816 if (author
.toUpperCase().startsWith(byString
.toUpperCase())) {
817 author
= author
.substring(byString
.length()).trim();
822 // Special case (without suffix):
823 if (author
.startsWith("©")) {
824 author
= author
.substring(1);
832 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
833 * and requotify them (i.e., separate them into QUOTE paragraphs and other
834 * paragraphs (quotes or not)).
837 * the paragraph to requotify (not necessaraly a quote)
839 * @return the correctly (or so we hope) quotified paragraphs
841 protected List
<Paragraph
> requotify(Paragraph para
) {
842 List
<Paragraph
> newParas
= new ArrayList
<Paragraph
>();
844 if (para
.getType() == ParagraphType
.QUOTE
845 && para
.getContent().length() > 2) {
846 String line
= para
.getContent();
847 boolean singleQ
= line
.startsWith("" + openQuote
);
848 boolean doubleQ
= line
.startsWith("" + openDoubleQuote
);
850 // Do not try when more than one quote at a time
851 // (some stories are not easily readable if we do)
853 && line
.indexOf(closeQuote
, 1) < line
854 .lastIndexOf(closeQuote
)) {
859 && line
.indexOf(closeDoubleQuote
, 1) < line
860 .lastIndexOf(closeDoubleQuote
)) {
866 if (!singleQ
&& !doubleQ
) {
867 line
= openDoubleQuote
+ line
+ closeDoubleQuote
;
868 newParas
.add(new Paragraph(ParagraphType
.QUOTE
, line
));
870 char open
= singleQ ? openQuote
: openDoubleQuote
;
871 char close
= singleQ ? closeQuote
: closeDoubleQuote
;
874 boolean inQuote
= false;
876 for (char car
: line
.toCharArray()) {
879 } else if (car
== close
) {
881 } else if (car
== '.' && !inQuote
) {
889 String rest
= line
.substring(posDot
+ 1).trim();
890 line
= line
.substring(0, posDot
+ 1).trim();
891 newParas
.add(new Paragraph(ParagraphType
.QUOTE
, line
));
892 if (!rest
.isEmpty()) {
893 newParas
.addAll(requotify(processPara(rest
)));
907 * Process a {@link Paragraph} from a raw line of text.
909 * Will also fix quotes and HTML encoding if needed.
914 * @return the processed {@link Paragraph}
916 private Paragraph
processPara(String line
) {
917 line
= ifUnhtml(line
).trim();
919 boolean space
= true;
921 boolean quote
= false;
922 boolean tentativeCloseQuote
= false;
926 StringBuilder builder
= new StringBuilder();
927 for (char car
: line
.toCharArray()) {
930 // dash, ndash and mdash: - – —
931 // currently: always use mdash
932 builder
.append(dashCount
== 1 ?
'-' : '—');
937 if (tentativeCloseQuote
) {
938 tentativeCloseQuote
= false;
939 if ((car
>= 'a' && car
<= 'z') || (car
>= 'A' && car
<= 'Z')
940 || (car
>= '0' && car
<= '9')) {
943 builder
.append(closeQuote
);
948 case ' ': // note: unbreakable space
951 case '\n': // just in case
952 case '\r': // just in case
957 if (space
|| (brk
&& quote
)) {
959 builder
.append(openQuote
);
960 } else if (prev
== ' ') {
961 builder
.append(openQuote
);
963 // it is a quote ("I'm off") or a 'quote' ("This
964 // 'good' restaurant"...)
965 tentativeCloseQuote
= true;
970 if (space
|| (brk
&& quote
)) {
972 builder
.append(openDoubleQuote
);
973 } else if (prev
== ' ') {
974 builder
.append(openDoubleQuote
);
976 builder
.append(closeDoubleQuote
);
1001 builder
.append(car
);
1010 if (space
|| (brk
&& quote
)) {
1012 builder
.append(openQuote
);
1014 builder
.append(openQuote
);
1027 builder
.append(closeQuote
);
1035 if (space
|| (brk
&& quote
)) {
1037 builder
.append(openDoubleQuote
);
1039 builder
.append(openDoubleQuote
);
1052 builder
.append(closeDoubleQuote
);
1058 builder
.append(car
);
1065 if (tentativeCloseQuote
) {
1066 tentativeCloseQuote
= false;
1067 builder
.append(closeQuote
);
1070 line
= builder
.toString().trim();
1072 ParagraphType type
= ParagraphType
.NORMAL
;
1074 type
= ParagraphType
.BLANK
;
1076 type
= ParagraphType
.BREAK
;
1078 type
= ParagraphType
.QUOTE
;
1081 return new Paragraph(type
, line
);
1085 * Remove the HTML from the inpit <b>if</b> {@link BasicSupport#isHtml()} is
1091 * @return the no html version if needed
1093 private String
ifUnhtml(String input
) {
1094 if (isHtml() && input
!= null) {
1095 return StringUtils
.unhtml(input
);
1102 * Return a {@link BasicSupport} implementation supporting the given
1103 * resource if possible.
1106 * the story resource
1108 * @return an implementation that supports it, or NULL
1110 public static BasicSupport
getSupport(URL url
) {
1115 // TEXT and INFO_TEXT always support files (not URLs though)
1116 for (SupportType type
: SupportType
.values()) {
1117 if (type
!= SupportType
.TEXT
&& type
!= SupportType
.INFO_TEXT
) {
1118 BasicSupport support
= getSupport(type
);
1119 if (support
!= null && support
.supports(url
)) {
1125 for (SupportType type
: new SupportType
[] { SupportType
.INFO_TEXT
,
1126 SupportType
.TEXT
}) {
1127 BasicSupport support
= getSupport(type
);
1128 if (support
!= null && support
.supports(url
)) {
1137 * Return a {@link BasicSupport} implementation supporting the given type.
1142 * @return an implementation that supports it, or NULL
1144 public static BasicSupport
getSupport(SupportType type
) {
1147 return new Epub().setType(type
);
1149 return new InfoText().setType(type
);
1151 return new Fimfiction().setType(type
);
1153 return new Fanfiction().setType(type
);
1155 return new Text().setType(type
);
1157 return new MangaFox().setType(type
);
1159 return new E621().setType(type
);
1161 return new Cbz().setType(type
);
1163 return new Html().setType(type
);
1170 * Return the first line from the given input which correspond to the given
1176 * a string that must be found inside the target line (also
1177 * supports "^" at start to say "only if it starts with" the
1179 * @param relativeLine
1180 * the line to return based upon the target line position (-1 =
1181 * the line before, 0 = the target line...)
1185 static String
getLine(InputStream in
, String needle
, int relativeLine
) {
1186 return getLine(in
, needle
, relativeLine
, true);
1190 * Return a line from the given input which correspond to the given
1196 * a string that must be found inside the target line (also
1197 * supports "^" at start to say "only if it starts with" the
1199 * @param relativeLine
1200 * the line to return based upon the target line position (-1 =
1201 * the line before, 0 = the target line...)
1203 * takes the first result (as opposed to the last one, which will
1204 * also always spend the input)
1208 static String
getLine(InputStream in
, String needle
, int relativeLine
,
1214 } catch (IOException e
) {
1218 List
<String
> lines
= new ArrayList
<String
>();
1219 @SuppressWarnings("resource")
1220 Scanner scan
= new Scanner(in
, "UTF-8");
1222 scan
.useDelimiter("\\n");
1223 while (scan
.hasNext()) {
1224 lines
.add(scan
.next());
1227 if (needle
.startsWith("^")) {
1228 if (lines
.get(lines
.size() - 1).startsWith(
1229 needle
.substring(1))) {
1230 index
= lines
.size() - 1;
1234 if (lines
.get(lines
.size() - 1).contains(needle
)) {
1235 index
= lines
.size() - 1;
1240 if (index
>= 0 && index
+ relativeLine
< lines
.size()) {
1241 rep
= lines
.get(index
+ relativeLine
);