1 package be
.nikiroo
.fanfix
.supported
;
3 import java
.io
.BufferedReader
;
4 import java
.io
.ByteArrayInputStream
;
6 import java
.io
.IOException
;
7 import java
.io
.InputStream
;
8 import java
.io
.InputStreamReader
;
9 import java
.net
.MalformedURLException
;
11 import java
.util
.ArrayList
;
12 import java
.util
.Date
;
13 import java
.util
.HashMap
;
14 import java
.util
.List
;
16 import java
.util
.Map
.Entry
;
17 import java
.util
.Scanner
;
19 import be
.nikiroo
.fanfix
.Instance
;
20 import be
.nikiroo
.fanfix
.bundles
.Config
;
21 import be
.nikiroo
.fanfix
.bundles
.StringId
;
22 import be
.nikiroo
.fanfix
.data
.Chapter
;
23 import be
.nikiroo
.fanfix
.data
.MetaData
;
24 import be
.nikiroo
.fanfix
.data
.Paragraph
;
25 import be
.nikiroo
.fanfix
.data
.Paragraph
.ParagraphType
;
26 import be
.nikiroo
.fanfix
.data
.Story
;
27 import be
.nikiroo
.utils
.Image
;
28 import be
.nikiroo
.utils
.Progress
;
29 import be
.nikiroo
.utils
.StringUtils
;
32 * This class is the base class used by the other support classes. It can be
33 * used outside of this package, and have static method that you can use to get
34 * access to the correct support class.
36 * It will be used with 'resources' (usually web pages or files).
40 public abstract class BasicSupport
{
42 * The supported input types for which we can get a {@link BasicSupport}
47 public enum SupportType
{
48 /** EPUB files created with this program */
50 /** Pure text file with some rules */
52 /** TEXT but with associated .info file */
54 /** My Little Pony fanfictions */
56 /** Fanfictions from a lot of different universes */
58 /** Website with lots of Mangas */
60 /** Furry website with comics support */
62 /** Furry website with stories */
64 /** Comics and images groups, mostly but not only NSFW */
72 * A description of this support type (more information than the
73 * {@link BasicSupport#getSourceName()}).
75 * @return the description
77 public String
getDesc() {
78 String desc
= Instance
.getTrans().getStringX(StringId
.INPUT_DESC
,
82 desc
= Instance
.getTrans().getString(StringId
.INPUT_DESC
, this);
89 * The name of this support type (a short version).
93 public String
getSourceName() {
94 BasicSupport support
= BasicSupport
.getSupport(this);
95 if (support
!= null) {
96 return support
.getSourceName();
103 public String
toString() {
104 return super.toString().toLowerCase();
108 * Call {@link SupportType#valueOf(String)} after conversion to upper
112 * the possible type name
114 * @return NULL or the type
116 public static SupportType
valueOfUC(String typeName
) {
117 return SupportType
.valueOf(typeName
== null ?
null : typeName
122 * Call {@link SupportType#valueOf(String)} after conversion to upper
123 * case but return NULL for NULL instead of raising exception.
126 * the possible type name
128 * @return NULL or the type
130 public static SupportType
valueOfNullOkUC(String typeName
) {
131 if (typeName
== null) {
135 return SupportType
.valueOfUC(typeName
);
139 * Call {@link SupportType#valueOf(String)} after conversion to upper
140 * case but return NULL in case of error instead of raising an
144 * the possible type name
146 * @return NULL or the type
148 public static SupportType
valueOfAllOkUC(String typeName
) {
150 return SupportType
.valueOfUC(typeName
);
151 } catch (Exception e
) {
157 private InputStream in
;
158 private SupportType type
;
159 private URL currentReferer
; // with only one 'r', as in 'HTTP'...
162 private char openQuote
= Instance
.getTrans().getCharacter(
163 StringId
.OPEN_SINGLE_QUOTE
);
164 private char closeQuote
= Instance
.getTrans().getCharacter(
165 StringId
.CLOSE_SINGLE_QUOTE
);
166 private char openDoubleQuote
= Instance
.getTrans().getCharacter(
167 StringId
.OPEN_DOUBLE_QUOTE
);
168 private char closeDoubleQuote
= Instance
.getTrans().getCharacter(
169 StringId
.CLOSE_DOUBLE_QUOTE
);
172 * The name of this support class.
176 protected abstract String
getSourceName();
179 * Check if the given resource is supported by this {@link BasicSupport}.
182 * the resource to check for
184 * @return TRUE if it is
186 protected abstract boolean supports(URL url
);
189 * Return TRUE if the support will return HTML encoded content values for
190 * the chapters content.
192 * @return TRUE for HTML
194 protected abstract boolean isHtml();
197 * Return the {@link MetaData} of this story.
200 * the source of the story
202 * the input (the main resource)
204 * @return the associated {@link MetaData}, never NULL
206 * @throws IOException
207 * in case of I/O error
209 protected abstract MetaData
getMeta(URL source
, InputStream in
)
213 * Return the story description.
216 * the source of the story
218 * the input (the main resource)
220 * @return the description
222 * @throws IOException
223 * in case of I/O error
225 protected abstract String
getDesc(URL source
, InputStream in
)
229 * Return the list of chapters (name and resource).
232 * the source of the story
234 * the input (the main resource)
236 * the optional progress reporter
238 * @return the chapters
240 * @throws IOException
241 * in case of I/O error
243 protected abstract List
<Entry
<String
, URL
>> getChapters(URL source
,
244 InputStream in
, Progress pg
) throws IOException
;
247 * Return the content of the chapter (possibly HTML encoded, if
248 * {@link BasicSupport#isHtml()} is TRUE).
251 * the source of the story
253 * the input (the main resource)
257 * the optional progress reporter
259 * @return the content
261 * @throws IOException
262 * in case of I/O error
264 protected abstract String
getChapterContent(URL source
, InputStream in
,
265 int number
, Progress pg
) throws IOException
;
268 * Log into the support (can be a no-op depending upon the support).
270 * @throws IOException
271 * in case of I/O error
273 @SuppressWarnings("unused")
274 public void login() throws IOException
{
278 * Return the list of cookies (values included) that must be used to
279 * correctly fetch the resources.
281 * You are expected to call the super method implementation if you override
284 * @return the cookies
286 public Map
<String
, String
> getCookies() {
287 return new HashMap
<String
, String
>();
291 * OAuth authorisation (aka, "bearer XXXXXXX").
293 * @return the OAuth string
295 public String
getOAuth() {
300 * Return the canonical form of the main {@link URL}.
303 * the source {@link URL}
305 * @return the canonical form of this {@link URL}
307 * @throws IOException
308 * in case of I/O error
310 @SuppressWarnings("unused")
311 public URL
getCanonicalUrl(URL source
) throws IOException
{
316 * Process the given story resource into a partially filled {@link Story}
317 * object containing the name and metadata, except for the description.
322 * @return the {@link Story}
324 * @throws IOException
325 * in case of I/O error
327 public Story
processMeta(URL url
) throws IOException
{
328 return processMeta(url
, true, false, null);
332 * Process the given story resource into a partially filled {@link Story}
333 * object containing the name and metadata.
338 * close "this" and "in" when done
340 * retrieve the description of the story, or not
342 * the optional progress reporter
344 * @return the {@link Story}, never NULL
346 * @throws IOException
347 * in case of I/O error
349 protected Story
processMeta(URL url
, boolean close
, boolean getDesc
,
350 Progress pg
) throws IOException
{
354 pg
.setMinMax(0, 100);
360 url
= getCanonicalUrl(url
);
362 setCurrentReferer(url
);
364 in
= openInput(url
); // NULL allowed here
366 preprocess(url
, getInput());
369 Story story
= new Story();
370 MetaData meta
= getMeta(url
, getInput());
371 if (meta
.getCreationDate() == null
372 || meta
.getCreationDate().isEmpty()) {
373 meta
.setCreationDate(StringUtils
.fromTime(new Date().getTime()));
379 if (meta
.getCover() == null) {
380 meta
.setCover(getDefaultCover(meta
.getSubject()));
386 String descChapterName
= Instance
.getTrans().getString(
387 StringId
.DESCRIPTION
);
388 story
.getMeta().setResume(
389 makeChapter(url
, 0, descChapterName
,
390 getDesc(url
, getInput()), null));
399 } catch (IOException e
) {
400 Instance
.getTraceHandler().error(e
);
408 setCurrentReferer(null);
413 * Process the given story resource into a fully filled {@link Story}
419 * the optional progress reporter
421 * @return the {@link Story}, never NULL
423 * @throws IOException
424 * in case of I/O error
426 public Story
process(URL url
, Progress pg
) throws IOException
{
430 pg
.setMinMax(0, 100);
433 url
= getCanonicalUrl(url
);
436 Progress pgMeta
= new Progress();
437 pg
.addProgress(pgMeta
, 10);
438 Story story
= processMeta(url
, false, true, pgMeta
);
439 if (!pgMeta
.isDone()) {
440 pgMeta
.setProgress(pgMeta
.getMax()); // 10%
443 pg
.setName("Retrieving " + story
.getMeta().getTitle());
445 setCurrentReferer(url
);
447 Progress pgGetChapters
= new Progress();
448 pg
.addProgress(pgGetChapters
, 10);
449 story
.setChapters(new ArrayList
<Chapter
>());
450 List
<Entry
<String
, URL
>> chapters
= getChapters(url
, getInput(),
452 if (!pgGetChapters
.isDone()) {
453 pgGetChapters
.setProgress(pgGetChapters
.getMax()); // 20%
456 if (chapters
!= null) {
457 Progress pgChaps
= new Progress("Extracting chapters", 0,
458 chapters
.size() * 300);
459 pg
.addProgress(pgChaps
, 80);
463 for (Entry
<String
, URL
> chap
: chapters
) {
464 pgChaps
.setName("Extracting chapter " + i
);
465 InputStream chapIn
= null;
466 if (chap
.getValue() != null) {
467 setCurrentReferer(chap
.getValue());
468 chapIn
= Instance
.getCache().open(chap
.getValue(),
471 pgChaps
.setProgress(i
* 100);
473 Progress pgGetChapterContent
= new Progress();
474 Progress pgMakeChapter
= new Progress();
475 pgChaps
.addProgress(pgGetChapterContent
, 100);
476 pgChaps
.addProgress(pgMakeChapter
, 100);
478 String content
= getChapterContent(url
, chapIn
, i
,
479 pgGetChapterContent
);
480 if (!pgGetChapterContent
.isDone()) {
481 pgGetChapterContent
.setProgress(pgGetChapterContent
485 Chapter cc
= makeChapter(url
, i
, chap
.getKey(),
486 content
, pgMakeChapter
);
487 if (!pgMakeChapter
.isDone()) {
488 pgMakeChapter
.setProgress(pgMakeChapter
.getMax());
491 words
+= cc
.getWords();
492 story
.getChapters().add(cc
);
493 story
.getMeta().setWords(words
);
495 if (chapIn
!= null) {
503 pgChaps
.setName("Extracting chapters");
513 } catch (IOException e
) {
514 Instance
.getTraceHandler().error(e
);
521 setCurrentReferer(null);
530 public SupportType
getType() {
535 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
536 * the current {@link URL} we work on.
538 * @return the referer
540 public URL
getCurrentReferer() {
541 return currentReferer
;
545 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
546 * the current {@link URL} we work on.
548 * @param currentReferer
551 protected void setCurrentReferer(URL currentReferer
) {
552 this.currentReferer
= currentReferer
;
563 protected BasicSupport
setType(SupportType type
) {
569 * Prepare the support if needed before processing.
572 * the source of the story
574 * the input (the main resource)
576 * @throws IOException
579 @SuppressWarnings("unused")
580 protected void preprocess(URL source
, InputStream in
) throws IOException
{
584 * Now that we have processed the {@link Story}, close the resources if any.
586 * @throws IOException
589 @SuppressWarnings("unused")
590 protected void close() throws IOException
{
594 * Create a {@link Chapter} object from the given information, formatting
595 * the content as it should be.
598 * the source of the story
604 * the chapter content
606 * the optional progress reporter
608 * @return the {@link Chapter}
610 * @throws IOException
611 * in case of I/O error
613 protected Chapter
makeChapter(URL source
, int number
, String name
,
614 String content
, Progress pg
) throws IOException
{
615 // Chapter name: process it correctly, then remove the possible
616 // redundant "Chapter x: " in front of it, or "-" (as in
617 // "Chapter 5: - Fun!" after the ": " was automatically added)
618 String chapterName
= processPara(name
).getContent().trim();
619 for (String lang
: Instance
.getConfig().getString(Config
.CHAPTER
)
621 String chapterWord
= Instance
.getConfig().getStringX(
622 Config
.CHAPTER
, lang
);
623 if (chapterName
.startsWith(chapterWord
)) {
624 chapterName
= chapterName
.substring(chapterWord
.length())
630 if (chapterName
.startsWith(Integer
.toString(number
))) {
631 chapterName
= chapterName
.substring(
632 Integer
.toString(number
).length()).trim();
635 while (chapterName
.startsWith(":") || chapterName
.startsWith("-")) {
636 chapterName
= chapterName
.substring(1).trim();
640 Chapter chap
= new Chapter(number
, chapterName
);
642 if (content
!= null) {
643 List
<Paragraph
> paras
= makeParagraphs(source
, content
, pg
);
645 for (Paragraph para
: paras
) {
646 words
+= para
.getWords();
648 chap
.setParagraphs(paras
);
649 chap
.setWords(words
);
657 * Convert the given content into {@link Paragraph}s.
660 * the source URL of the story
662 * the textual content
664 * the optional progress reporter
666 * @return the {@link Paragraph}s
668 * @throws IOException
669 * in case of I/O error
671 protected List
<Paragraph
> makeParagraphs(URL source
, String content
,
672 Progress pg
) throws IOException
{
678 // Special <HR> processing:
679 content
= content
.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
683 List
<Paragraph
> paras
= new ArrayList
<Paragraph
>();
685 if (content
!= null && !content
.trim().isEmpty()) {
687 String
[] tab
= content
.split("(<p>|</p>|<br>|<br/>)");
688 pg
.setMinMax(0, tab
.length
);
690 for (String line
: tab
) {
691 if (line
.startsWith("[") && line
.endsWith("]")) {
692 pg
.setName("Extracting image " + i
);
694 paras
.add(makeParagraph(source
, line
.trim()));
699 List
<String
> lines
= new ArrayList
<String
>();
700 BufferedReader buff
= null;
702 buff
= new BufferedReader(
703 new InputStreamReader(new ByteArrayInputStream(
704 content
.getBytes("UTF-8")), "UTF-8"));
705 for (String line
= buff
.readLine(); line
!= null; line
= buff
707 lines
.add(line
.trim());
715 pg
.setMinMax(0, lines
.size());
717 for (String line
: lines
) {
718 if (line
.startsWith("[") && line
.endsWith("]")) {
719 pg
.setName("Extracting image " + i
);
721 paras
.add(makeParagraph(source
, line
));
727 // Check quotes for "bad" format
728 List
<Paragraph
> newParas
= new ArrayList
<Paragraph
>();
729 for (Paragraph para
: paras
) {
730 newParas
.addAll(requotify(para
));
734 // Remove double blanks/brks
735 fixBlanksBreaks(paras
);
742 * Convert the given line into a single {@link Paragraph}.
745 * the source URL of the story
747 * the textual content of the paragraph
749 * @return the {@link Paragraph}
751 private Paragraph
makeParagraph(URL source
, String line
) {
753 if (line
.startsWith("[") && line
.endsWith("]")) {
754 image
= getImage(this, source
, line
.substring(1, line
.length() - 1)
759 return new Paragraph(image
);
762 return processPara(line
);
766 * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
767 * those {@link Paragraph}s.
769 * The resulting list will not contain a starting or trailing blank/break
770 * nor 2 blanks or breaks following each other.
773 * the list of {@link Paragraph}s to fix
775 protected void fixBlanksBreaks(List
<Paragraph
> paras
) {
776 boolean space
= false;
778 for (int i
= 0; i
< paras
.size(); i
++) {
779 Paragraph para
= paras
.get(i
);
780 boolean thisSpace
= para
.getType() == ParagraphType
.BLANK
;
781 boolean thisBrk
= para
.getType() == ParagraphType
.BREAK
;
783 if (i
> 0 && space
&& thisBrk
) {
786 } else if ((space
|| brk
) && (thisSpace
|| thisBrk
)) {
795 // Remove blank/brk at start
797 && (paras
.get(0).getType() == ParagraphType
.BLANK
|| paras
.get(
798 0).getType() == ParagraphType
.BREAK
)) {
802 // Remove blank/brk at end
803 int last
= paras
.size() - 1;
805 && (paras
.get(last
).getType() == ParagraphType
.BLANK
|| paras
806 .get(last
).getType() == ParagraphType
.BREAK
)) {
812 * Get the default cover related to this subject (see <tt>.info</tt> files).
817 * @return the cover if any, or NULL
819 static Image
getDefaultCover(String subject
) {
820 if (subject
!= null && !subject
.isEmpty()
821 && Instance
.getCoverDir() != null) {
823 File fileCover
= new File(Instance
.getCoverDir(), subject
);
824 return getImage(null, fileCover
.toURI().toURL(), subject
);
825 } catch (MalformedURLException e
) {
833 * Return the list of supported image extensions.
835 * @param emptyAllowed
836 * TRUE to allow an empty extension on first place, which can be
837 * used when you may already have an extension in your input but
838 * are not sure about it
840 * @return the extensions
842 static String
[] getImageExt(boolean emptyAllowed
) {
844 return new String
[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
847 return new String
[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
851 * Check if the given resource can be a local image or a remote image, then
852 * refresh the cache with it if it is.
857 * the resource to check
859 * @return the image if found, or NULL
862 static Image
getImage(BasicSupport support
, URL source
, String line
) {
863 URL url
= getImageUrl(support
, source
, line
);
865 if ("file".equals(url
.getProtocol())) {
866 if (new File(url
.getPath()).isDirectory()) {
870 InputStream in
= null;
872 in
= Instance
.getCache().open(url
, getSupport(url
), true);
873 return new Image(in
);
874 } catch (IOException e
) {
879 } catch (IOException e
) {
889 * Check if the given resource can be a local image or a remote image, then
890 * refresh the cache with it if it is.
895 * the resource to check
897 * @return the image URL if found, or NULL
900 static URL
getImageUrl(BasicSupport support
, URL source
, String line
) {
905 if (source
!= null) {
908 String relPath
= null;
909 String absPath
= null;
911 String path
= new File(source
.getFile()).getParent();
912 relPath
= new File(new File(path
), line
.trim())
914 } catch (Exception e
) {
915 // Cannot be converted to path (one possibility to take
916 // into account: absolute path on Windows)
919 absPath
= new File(line
.trim()).getAbsolutePath();
920 } catch (Exception e
) {
921 // Cannot be converted to path (at all)
924 for (String ext
: getImageExt(true)) {
925 File absFile
= new File(absPath
+ ext
);
926 File relFile
= new File(relPath
+ ext
);
927 if (absPath
!= null && absFile
.exists()
928 && absFile
.isFile()) {
929 url
= absFile
.toURI().toURL();
930 } else if (relPath
!= null && relFile
.exists()
931 && relFile
.isFile()) {
932 url
= relFile
.toURI().toURL();
935 } catch (Exception e
) {
936 // Should not happen since we control the correct arguments
943 for (String ext
: getImageExt(true)) {
944 if (Instance
.getCache()
945 .check(new URL(line
+ ext
), true)) {
946 url
= new URL(line
+ ext
);
953 for (String ext
: getImageExt(true)) {
955 url
= new URL(line
+ ext
);
956 Instance
.getCache().refresh(url
, support
, true);
958 } catch (IOException e
) {
959 // no image with this ext
964 } catch (MalformedURLException e
) {
969 // refresh the cached file
972 Instance
.getCache().refresh(url
, support
, true);
973 } catch (IOException e
) {
974 // woops, broken image
984 * Open the input file that will be used through the support.
986 * Can return NULL, in which case you are supposed to work without an
987 * {@link InputStream}.
990 * the source {@link URL}
992 * @return the {@link InputStream}
994 * @throws IOException
995 * in case of I/O error
997 protected InputStream
openInput(URL source
) throws IOException
{
998 return Instance
.getCache().open(source
, this, false);
1002 * Reset then return {@link BasicSupport#in}.
1004 * @return {@link BasicSupport#in}
1006 protected InputStream
getInput() {
1011 * Fix the author name if it is prefixed with some "by" {@link String}.
1014 * the author with a possible prefix
1016 * @return the author without prefixes
1018 protected String
fixAuthor(String author
) {
1019 if (author
!= null) {
1020 for (String suffix
: new String
[] { " ", ":" }) {
1021 for (String byString
: Instance
.getConfig()
1022 .getString(Config
.BYS
).split(",")) {
1024 if (author
.toUpperCase().startsWith(byString
.toUpperCase())) {
1025 author
= author
.substring(byString
.length()).trim();
1030 // Special case (without suffix):
1031 if (author
.startsWith("©")) {
1032 author
= author
.substring(1);
1040 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
1041 * and requotify them (i.e., separate them into QUOTE paragraphs and other
1042 * paragraphs (quotes or not)).
1045 * the paragraph to requotify (not necessarily a quote)
1047 * @return the correctly (or so we hope) quotified paragraphs
1049 protected List
<Paragraph
> requotify(Paragraph para
) {
1050 List
<Paragraph
> newParas
= new ArrayList
<Paragraph
>();
1052 if (para
.getType() == ParagraphType
.QUOTE
1053 && para
.getContent().length() > 2) {
1054 String line
= para
.getContent();
1055 boolean singleQ
= line
.startsWith("" + openQuote
);
1056 boolean doubleQ
= line
.startsWith("" + openDoubleQuote
);
1058 // Do not try when more than one quote at a time
1059 // (some stories are not easily readable if we do)
1061 && line
.indexOf(closeQuote
, 1) < line
1062 .lastIndexOf(closeQuote
)) {
1067 && line
.indexOf(closeDoubleQuote
, 1) < line
1068 .lastIndexOf(closeDoubleQuote
)) {
1074 if (!singleQ
&& !doubleQ
) {
1075 line
= openDoubleQuote
+ line
+ closeDoubleQuote
;
1076 newParas
.add(new Paragraph(ParagraphType
.QUOTE
, line
, para
1079 char open
= singleQ ? openQuote
: openDoubleQuote
;
1080 char close
= singleQ ? closeQuote
: closeDoubleQuote
;
1083 boolean inQuote
= false;
1085 for (char car
: line
.toCharArray()) {
1088 } else if (car
== close
) {
1090 } else if (car
== '.' && !inQuote
) {
1098 String rest
= line
.substring(posDot
+ 1).trim();
1099 line
= line
.substring(0, posDot
+ 1).trim();
1101 for (char car
: line
.toCharArray()) {
1106 newParas
.add(new Paragraph(ParagraphType
.QUOTE
, line
, words
));
1107 if (!rest
.isEmpty()) {
1108 newParas
.addAll(requotify(processPara(rest
)));
1122 * Process a {@link Paragraph} from a raw line of text.
1124 * Will also fix quotes and HTML encoding if needed.
1129 * @return the processed {@link Paragraph}
1131 protected Paragraph
processPara(String line
) {
1132 line
= ifUnhtml(line
).trim();
1134 boolean space
= true;
1136 boolean quote
= false;
1137 boolean tentativeCloseQuote
= false;
1142 StringBuilder builder
= new StringBuilder();
1143 for (char car
: line
.toCharArray()) {
1145 if (dashCount
> 0) {
1146 // dash, ndash and mdash: - – —
1147 // currently: always use mdash
1148 builder
.append(dashCount
== 1 ?
'-' : '—');
1153 if (tentativeCloseQuote
) {
1154 tentativeCloseQuote
= false;
1155 if (Character
.isLetterOrDigit(car
)) {
1156 builder
.append("'");
1158 // handle double-single quotes as double quotes
1160 builder
.append(closeDoubleQuote
);
1164 builder
.append(closeQuote
);
1169 case ' ': // note: unbreakable space
1172 case '\n': // just in case
1173 case '\r': // just in case
1174 if (builder
.length() > 0
1175 && builder
.charAt(builder
.length() - 1) != ' ') {
1178 builder
.append(' ');
1182 if (space
|| (brk
&& quote
)) {
1184 // handle double-single quotes as double quotes
1186 builder
.deleteCharAt(builder
.length() - 1);
1187 builder
.append(openDoubleQuote
);
1189 builder
.append(openQuote
);
1191 } else if (prev
== ' ' || prev
== car
) {
1192 // handle double-single quotes as double quotes
1194 builder
.deleteCharAt(builder
.length() - 1);
1195 builder
.append(openDoubleQuote
);
1197 builder
.append(openQuote
);
1200 // it is a quote ("I'm off") or a 'quote' ("This
1201 // 'good' restaurant"...)
1202 tentativeCloseQuote
= true;
1207 if (space
|| (brk
&& quote
)) {
1209 builder
.append(openDoubleQuote
);
1210 } else if (prev
== ' ') {
1211 builder
.append(openDoubleQuote
);
1213 builder
.append(closeDoubleQuote
);
1238 builder
.append(car
);
1247 if (space
|| (brk
&& quote
)) {
1249 builder
.append(openQuote
);
1251 // handle double-single quotes as double quotes
1253 builder
.deleteCharAt(builder
.length() - 1);
1254 builder
.append(openDoubleQuote
);
1256 builder
.append(openQuote
);
1270 // handle double-single quotes as double quotes
1272 builder
.deleteCharAt(builder
.length() - 1);
1273 builder
.append(closeDoubleQuote
);
1275 builder
.append(closeQuote
);
1284 if (space
|| (brk
&& quote
)) {
1286 builder
.append(openDoubleQuote
);
1288 builder
.append(openDoubleQuote
);
1301 builder
.append(closeDoubleQuote
);
1307 builder
.append(car
);
1314 if (tentativeCloseQuote
) {
1315 tentativeCloseQuote
= false;
1316 builder
.append(closeQuote
);
1319 line
= builder
.toString().trim();
1321 ParagraphType type
= ParagraphType
.NORMAL
;
1323 type
= ParagraphType
.BLANK
;
1325 type
= ParagraphType
.BREAK
;
1327 type
= ParagraphType
.QUOTE
;
1330 return new Paragraph(type
, line
, words
);
1334 * Remove the HTML from the input <b>if</b> {@link BasicSupport#isHtml()} is
1340 * @return the no html version if needed
1342 private String
ifUnhtml(String input
) {
1343 if (isHtml() && input
!= null) {
1344 return StringUtils
.unhtml(input
);
1351 * Return a {@link BasicSupport} implementation supporting the given
1352 * resource if possible.
1355 * the story resource
1357 * @return an implementation that supports it, or NULL
1359 public static BasicSupport
getSupport(URL url
) {
1364 // TEXT and INFO_TEXT always support files (not URLs though)
1365 for (SupportType type
: SupportType
.values()) {
1366 if (type
!= SupportType
.TEXT
&& type
!= SupportType
.INFO_TEXT
) {
1367 BasicSupport support
= getSupport(type
);
1368 if (support
!= null && support
.supports(url
)) {
1374 for (SupportType type
: new SupportType
[] { SupportType
.INFO_TEXT
,
1375 SupportType
.TEXT
}) {
1376 BasicSupport support
= getSupport(type
);
1377 if (support
!= null && support
.supports(url
)) {
1386 * Return a {@link BasicSupport} implementation supporting the given type.
1391 * @return an implementation that supports it, or NULL
1393 public static BasicSupport
getSupport(SupportType type
) {
1396 return new Epub().setType(type
);
1398 return new InfoText().setType(type
);
1401 // Can fail if no client key or NO in options
1402 return new FimfictionApi().setType(type
);
1403 } catch (IOException e
) {
1404 return new Fimfiction().setType(type
);
1407 return new Fanfiction().setType(type
);
1409 return new Text().setType(type
);
1411 return new MangaFox().setType(type
);
1413 return new E621().setType(type
);
1415 return new YiffStar().setType(type
);
1417 return new EHentai().setType(type
);
1419 return new Cbz().setType(type
);
1421 return new Html().setType(type
);
1428 * Reset the given {@link InputStream} and return it.
1431 * the {@link InputStream} to reset
1433 * @return the same {@link InputStream} after reset
1435 static protected InputStream
reset(InputStream in
) {
1440 } catch (IOException e
) {
1447 * Return the first line from the given input which correspond to the given
1453 * a string that must be found inside the target line (also
1454 * supports "^" at start to say "only if it starts with" the
1456 * @param relativeLine
1457 * the line to return based upon the target line position (-1 =
1458 * the line before, 0 = the target line...)
1462 static protected String
getLine(InputStream in
, String needle
,
1464 return getLine(in
, needle
, relativeLine
, true);
1468 * Return a line from the given input which correspond to the given
1474 * a string that must be found inside the target line (also
1475 * supports "^" at start to say "only if it starts with" the
1477 * @param relativeLine
1478 * the line to return based upon the target line position (-1 =
1479 * the line before, 0 = the target line...)
1481 * takes the first result (as opposed to the last one, which will
1482 * also always spend the input)
1486 static protected String
getLine(InputStream in
, String needle
,
1487 int relativeLine
, boolean first
) {
1492 List
<String
> lines
= new ArrayList
<String
>();
1493 @SuppressWarnings("resource")
1494 Scanner scan
= new Scanner(in
, "UTF-8");
1496 scan
.useDelimiter("\\n");
1497 while (scan
.hasNext()) {
1498 lines
.add(scan
.next());
1501 if (needle
.startsWith("^")) {
1502 if (lines
.get(lines
.size() - 1).startsWith(
1503 needle
.substring(1))) {
1504 index
= lines
.size() - 1;
1508 if (lines
.get(lines
.size() - 1).contains(needle
)) {
1509 index
= lines
.size() - 1;
1514 if (index
>= 0 && index
+ relativeLine
< lines
.size()) {
1515 rep
= lines
.get(index
+ relativeLine
);
1526 * Return the text between the key and the endKey (and optional subKey can
1527 * be passed, in this case we will look for the key first, then take the
1528 * text between the subKey and the endKey).
1530 * Will only match the first line with the given key if more than one are
1531 * possible. Which also means that if the subKey or endKey is not found on
1532 * that line, NULL will be returned.
1537 * the key to match (also supports "^" at start to say
1538 * "only if it starts with" the key)
1540 * the sub key or NULL if none
1542 * the end key or NULL for "up to the end"
1543 * @return the text or NULL if not found
1545 static protected String
getKeyLine(InputStream in
, String key
,
1546 String subKey
, String endKey
) {
1547 return getKeyText(getLine(in
, key
, 0), key
, subKey
, endKey
);
1551 * Return the text between the key and the endKey (and optional subKey can
1552 * be passed, in this case we will look for the key first, then take the
1553 * text between the subKey and the endKey).
1558 * the key to match (also supports "^" at start to say
1559 * "only if it starts with" the key)
1561 * the sub key or NULL if none
1563 * the end key or NULL for "up to the end"
1564 * @return the text or NULL if not found
1566 static protected String
getKeyText(String in
, String key
, String subKey
,
1568 String result
= null;
1571 if (line
!= null && line
.contains(key
)) {
1572 line
= line
.substring(line
.indexOf(key
) + key
.length());
1573 if (subKey
== null || subKey
.isEmpty() || line
.contains(subKey
)) {
1574 if (subKey
!= null) {
1575 line
= line
.substring(line
.indexOf(subKey
)
1578 if (endKey
== null || line
.contains(endKey
)) {
1579 if (endKey
!= null) {
1580 line
= line
.substring(0, line
.indexOf(endKey
));
1591 * Return the text between the key and the endKey (optional subKeys can be
1592 * passed, in this case we will look for the subKeys first, then take the
1593 * text between the key and the endKey).
1600 * the end key or NULL for "up to the end"
1602 * the sub-keys to find before checking for key/endKey
1604 * @return the text or NULL if not found
1606 static protected String
getKeyTextAfter(String in
, String key
,
1607 String endKey
, String
... afters
) {
1609 if (in
!= null && !in
.isEmpty()) {
1610 int pos
= indexOfAfter(in
, 0, afters
);
1615 in
= in
.substring(pos
);
1618 return getKeyText(in
, key
, null, endKey
);
1622 * Return the first index after all the given "afters" have been found in
1623 * the {@link String}, or -1 if it was not possible.
1628 * start at this position in the string
1630 * the sub-keys to find before checking for key/endKey
1632 * @return the text or NULL if not found
1634 static protected int indexOfAfter(String in
, int startAt
, String
... afters
) {
1636 if (in
!= null && !in
.isEmpty()) {
1638 if (afters
!= null) {
1639 for (int i
= 0; pos
>= 0 && i
< afters
.length
; i
++) {
1640 String subKey
= afters
[i
];
1641 if (!subKey
.isEmpty()) {
1642 pos
= in
.indexOf(subKey
, pos
);
1644 pos
+= subKey
.length();