1 package be
.nikiroo
.fanfix
.supported
;
3 import java
.awt
.image
.BufferedImage
;
4 import java
.io
.BufferedReader
;
5 import java
.io
.ByteArrayInputStream
;
7 import java
.io
.IOException
;
8 import java
.io
.InputStream
;
9 import java
.io
.InputStreamReader
;
10 import java
.net
.MalformedURLException
;
12 import java
.util
.ArrayList
;
13 import java
.util
.Date
;
14 import java
.util
.HashMap
;
15 import java
.util
.List
;
17 import java
.util
.Map
.Entry
;
18 import java
.util
.Scanner
;
20 import be
.nikiroo
.fanfix
.Instance
;
21 import be
.nikiroo
.fanfix
.bundles
.Config
;
22 import be
.nikiroo
.fanfix
.bundles
.StringId
;
23 import be
.nikiroo
.fanfix
.data
.Chapter
;
24 import be
.nikiroo
.fanfix
.data
.MetaData
;
25 import be
.nikiroo
.fanfix
.data
.Paragraph
;
26 import be
.nikiroo
.fanfix
.data
.Paragraph
.ParagraphType
;
27 import be
.nikiroo
.fanfix
.data
.Story
;
28 import be
.nikiroo
.utils
.ImageUtils
;
29 import be
.nikiroo
.utils
.Progress
;
30 import be
.nikiroo
.utils
.StringUtils
;
33 * This class is the base class used by the other support classes. It can be
34 * used outside of this package, and have static method that you can use to get
35 * access to the correct support class.
37 * It will be used with 'resources' (usually web pages or files).
41 public abstract class BasicSupport
{
43 * The supported input types for which we can get a {@link BasicSupport}
48 public enum SupportType
{
49 /** EPUB files created with this program */
51 /** Pure text file with some rules */
53 /** TEXT but with associated .info file */
55 /** My Little Pony fanfictions */
57 /** Fanfictions from a lot of different universes */
59 /** Website with lots of Mangas */
61 /** Furry website with comics support */
63 /** Furry website with stories */
65 /** Comics and images groups, mostly but not only NSFW */
73 * A description of this support type (more information than the
74 * {@link BasicSupport#getSourceName()}).
76 * @return the description
78 public String
getDesc() {
79 String desc
= Instance
.getTrans().getStringX(StringId
.INPUT_DESC
,
83 desc
= Instance
.getTrans().getString(StringId
.INPUT_DESC
, this);
90 * The name of this support type (a short version).
94 public String
getSourceName() {
95 BasicSupport support
= BasicSupport
.getSupport(this);
96 if (support
!= null) {
97 return support
.getSourceName();
104 public String
toString() {
105 return super.toString().toLowerCase();
109 * Call {@link SupportType#valueOf(String)} after conversion to upper
113 * the possible type name
115 * @return NULL or the type
117 public static SupportType
valueOfUC(String typeName
) {
118 return SupportType
.valueOf(typeName
== null ?
null : typeName
123 * Call {@link SupportType#valueOf(String)} after conversion to upper
124 * case but return NULL for NULL instead of raising exception.
127 * the possible type name
129 * @return NULL or the type
131 public static SupportType
valueOfNullOkUC(String typeName
) {
132 if (typeName
== null) {
136 return SupportType
.valueOfUC(typeName
);
140 * Call {@link SupportType#valueOf(String)} after conversion to upper
141 * case but return NULL in case of error instead of raising an
145 * the possible type name
147 * @return NULL or the type
149 public static SupportType
valueOfAllOkUC(String typeName
) {
151 return SupportType
.valueOfUC(typeName
);
152 } catch (Exception e
) {
158 private InputStream in
;
159 private SupportType type
;
160 private URL currentReferer
; // with only one 'r', as in 'HTTP'...
163 private char openQuote
= Instance
.getTrans().getCharacter(
164 StringId
.OPEN_SINGLE_QUOTE
);
165 private char closeQuote
= Instance
.getTrans().getCharacter(
166 StringId
.CLOSE_SINGLE_QUOTE
);
167 private char openDoubleQuote
= Instance
.getTrans().getCharacter(
168 StringId
.OPEN_DOUBLE_QUOTE
);
169 private char closeDoubleQuote
= Instance
.getTrans().getCharacter(
170 StringId
.CLOSE_DOUBLE_QUOTE
);
173 * The name of this support class.
177 protected abstract String
getSourceName();
180 * Check if the given resource is supported by this {@link BasicSupport}.
183 * the resource to check for
185 * @return TRUE if it is
187 protected abstract boolean supports(URL url
);
190 * Return TRUE if the support will return HTML encoded content values for
191 * the chapters content.
193 * @return TRUE for HTML
195 protected abstract boolean isHtml();
198 * Return the {@link MetaData} of this story.
201 * the source of the story
203 * the input (the main resource)
205 * @return the associated {@link MetaData}, never NULL
207 * @throws IOException
208 * in case of I/O error
210 protected abstract MetaData
getMeta(URL source
, InputStream in
)
214 * Return the story description.
217 * the source of the story
219 * the input (the main resource)
221 * @return the description
223 * @throws IOException
224 * in case of I/O error
226 protected abstract String
getDesc(URL source
, InputStream in
)
230 * Return the list of chapters (name and resource).
233 * the source of the story
235 * the input (the main resource)
237 * the optional progress reporter
239 * @return the chapters
241 * @throws IOException
242 * in case of I/O error
244 protected abstract List
<Entry
<String
, URL
>> getChapters(URL source
,
245 InputStream in
, Progress pg
) throws IOException
;
248 * Return the content of the chapter (possibly HTML encoded, if
249 * {@link BasicSupport#isHtml()} is TRUE).
252 * the source of the story
254 * the input (the main resource)
258 * the optional progress reporter
260 * @return the content
262 * @throws IOException
263 * in case of I/O error
265 protected abstract String
getChapterContent(URL source
, InputStream in
,
266 int number
, Progress pg
) throws IOException
;
269 * Log into the support (can be a no-op depending upon the support).
271 * @throws IOException
272 * in case of I/O error
274 @SuppressWarnings("unused")
275 public void login() throws IOException
{
279 * Return the list of cookies (values included) that must be used to
280 * correctly fetch the resources.
282 * You are expected to call the super method implementation if you override
285 * @return the cookies
287 public Map
<String
, String
> getCookies() {
288 return new HashMap
<String
, String
>();
292 * OAuth authorisation (aka, "bearer XXXXXXX").
294 * @return the OAuth string
296 public String
getOAuth() {
301 * Return the canonical form of the main {@link URL}.
304 * the source {@link URL}
306 * @return the canonical form of this {@link URL}
308 * @throws IOException
309 * in case of I/O error
311 @SuppressWarnings("unused")
312 public URL
getCanonicalUrl(URL source
) throws IOException
{
317 * Process the given story resource into a partially filled {@link Story}
318 * object containing the name and metadata, except for the description.
323 * @return the {@link Story}
325 * @throws IOException
326 * in case of I/O error
328 public Story
processMeta(URL url
) throws IOException
{
329 return processMeta(url
, true, false, null);
333 * Process the given story resource into a partially filled {@link Story}
334 * object containing the name and metadata.
339 * close "this" and "in" when done
341 * retrieve the description of the story, or not
343 * the optional progress reporter
345 * @return the {@link Story}, never NULL
347 * @throws IOException
348 * in case of I/O error
350 protected Story
processMeta(URL url
, boolean close
, boolean getDesc
,
351 Progress pg
) throws IOException
{
355 pg
.setMinMax(0, 100);
361 url
= getCanonicalUrl(url
);
363 setCurrentReferer(url
);
365 in
= openInput(url
); // NULL allowed here
367 preprocess(url
, getInput());
370 Story story
= new Story();
371 MetaData meta
= getMeta(url
, getInput());
372 if (meta
.getCreationDate() == null
373 || meta
.getCreationDate().isEmpty()) {
374 meta
.setCreationDate(StringUtils
.fromTime(new Date().getTime()));
380 if (meta
.getCover() == null) {
381 meta
.setCover(getDefaultCover(meta
.getSubject()));
387 String descChapterName
= Instance
.getTrans().getString(
388 StringId
.DESCRIPTION
);
389 story
.getMeta().setResume(
390 makeChapter(url
, 0, descChapterName
,
391 getDesc(url
, getInput()), null));
400 } catch (IOException e
) {
401 Instance
.getTraceHandler().error(e
);
409 setCurrentReferer(null);
414 * Process the given story resource into a fully filled {@link Story}
420 * the optional progress reporter
422 * @return the {@link Story}, never NULL
424 * @throws IOException
425 * in case of I/O error
427 public Story
process(URL url
, Progress pg
) throws IOException
{
431 pg
.setMinMax(0, 100);
434 url
= getCanonicalUrl(url
);
437 Progress pgMeta
= new Progress();
438 pg
.addProgress(pgMeta
, 10);
439 Story story
= processMeta(url
, false, true, pgMeta
);
440 if (!pgMeta
.isDone()) {
441 pgMeta
.setProgress(pgMeta
.getMax()); // 10%
444 pg
.setName("Retrieving " + story
.getMeta().getTitle());
446 setCurrentReferer(url
);
448 Progress pgGetChapters
= new Progress();
449 pg
.addProgress(pgGetChapters
, 10);
450 story
.setChapters(new ArrayList
<Chapter
>());
451 List
<Entry
<String
, URL
>> chapters
= getChapters(url
, getInput(),
453 if (!pgGetChapters
.isDone()) {
454 pgGetChapters
.setProgress(pgGetChapters
.getMax()); // 20%
457 if (chapters
!= null) {
458 Progress pgChaps
= new Progress("Extracting chapters", 0,
459 chapters
.size() * 300);
460 pg
.addProgress(pgChaps
, 80);
464 for (Entry
<String
, URL
> chap
: chapters
) {
465 pgChaps
.setName("Extracting chapter " + i
);
466 InputStream chapIn
= null;
467 if (chap
.getValue() != null) {
468 setCurrentReferer(chap
.getValue());
469 chapIn
= Instance
.getCache().open(chap
.getValue(),
472 pgChaps
.setProgress(i
* 100);
474 Progress pgGetChapterContent
= new Progress();
475 Progress pgMakeChapter
= new Progress();
476 pgChaps
.addProgress(pgGetChapterContent
, 100);
477 pgChaps
.addProgress(pgMakeChapter
, 100);
479 String content
= getChapterContent(url
, chapIn
, i
,
480 pgGetChapterContent
);
481 if (!pgGetChapterContent
.isDone()) {
482 pgGetChapterContent
.setProgress(pgGetChapterContent
486 Chapter cc
= makeChapter(url
, i
, chap
.getKey(),
487 content
, pgMakeChapter
);
488 if (!pgMakeChapter
.isDone()) {
489 pgMakeChapter
.setProgress(pgMakeChapter
.getMax());
492 words
+= cc
.getWords();
493 story
.getChapters().add(cc
);
494 story
.getMeta().setWords(words
);
496 if (chapIn
!= null) {
504 pgChaps
.setName("Extracting chapters");
514 } catch (IOException e
) {
515 Instance
.getTraceHandler().error(e
);
522 setCurrentReferer(null);
531 public SupportType
getType() {
536 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
537 * the current {@link URL} we work on.
539 * @return the referer
541 public URL
getCurrentReferer() {
542 return currentReferer
;
546 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
547 * the current {@link URL} we work on.
549 * @param currentReferer
552 protected void setCurrentReferer(URL currentReferer
) {
553 this.currentReferer
= currentReferer
;
564 protected BasicSupport
setType(SupportType type
) {
570 * Prepare the support if needed before processing.
573 * the source of the story
575 * the input (the main resource)
577 * @throws IOException
580 @SuppressWarnings("unused")
581 protected void preprocess(URL source
, InputStream in
) throws IOException
{
585 * Now that we have processed the {@link Story}, close the resources if any.
587 * @throws IOException
590 @SuppressWarnings("unused")
591 protected void close() throws IOException
{
595 * Create a {@link Chapter} object from the given information, formatting
596 * the content as it should be.
599 * the source of the story
605 * the chapter content
607 * the optional progress reporter
609 * @return the {@link Chapter}
611 * @throws IOException
612 * in case of I/O error
614 protected Chapter
makeChapter(URL source
, int number
, String name
,
615 String content
, Progress pg
) throws IOException
{
616 // Chapter name: process it correctly, then remove the possible
617 // redundant "Chapter x: " in front of it, or "-" (as in
618 // "Chapter 5: - Fun!" after the ": " was automatically added)
619 String chapterName
= processPara(name
).getContent().trim();
620 for (String lang
: Instance
.getConfig().getString(Config
.CHAPTER
)
622 String chapterWord
= Instance
.getConfig().getStringX(
623 Config
.CHAPTER
, lang
);
624 if (chapterName
.startsWith(chapterWord
)) {
625 chapterName
= chapterName
.substring(chapterWord
.length())
631 if (chapterName
.startsWith(Integer
.toString(number
))) {
632 chapterName
= chapterName
.substring(
633 Integer
.toString(number
).length()).trim();
636 while (chapterName
.startsWith(":") || chapterName
.startsWith("-")) {
637 chapterName
= chapterName
.substring(1).trim();
641 Chapter chap
= new Chapter(number
, chapterName
);
643 if (content
!= null) {
644 List
<Paragraph
> paras
= makeParagraphs(source
, content
, pg
);
646 for (Paragraph para
: paras
) {
647 words
+= para
.getWords();
649 chap
.setParagraphs(paras
);
650 chap
.setWords(words
);
658 * Convert the given content into {@link Paragraph}s.
661 * the source URL of the story
663 * the textual content
665 * the optional progress reporter
667 * @return the {@link Paragraph}s
669 * @throws IOException
670 * in case of I/O error
672 protected List
<Paragraph
> makeParagraphs(URL source
, String content
,
673 Progress pg
) throws IOException
{
679 // Special <HR> processing:
680 content
= content
.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
684 List
<Paragraph
> paras
= new ArrayList
<Paragraph
>();
686 if (content
!= null && !content
.trim().isEmpty()) {
688 String
[] tab
= content
.split("(<p>|</p>|<br>|<br/>)");
689 pg
.setMinMax(0, tab
.length
);
691 for (String line
: tab
) {
692 if (line
.startsWith("[") && line
.endsWith("]")) {
693 pg
.setName("Extracting image " + i
);
695 paras
.add(makeParagraph(source
, line
.trim()));
700 List
<String
> lines
= new ArrayList
<String
>();
701 BufferedReader buff
= null;
703 buff
= new BufferedReader(
704 new InputStreamReader(new ByteArrayInputStream(
705 content
.getBytes("UTF-8")), "UTF-8"));
706 for (String line
= buff
.readLine(); line
!= null; line
= buff
708 lines
.add(line
.trim());
716 pg
.setMinMax(0, lines
.size());
718 for (String line
: lines
) {
719 if (line
.startsWith("[") && line
.endsWith("]")) {
720 pg
.setName("Extracting image " + i
);
722 paras
.add(makeParagraph(source
, line
));
728 // Check quotes for "bad" format
729 List
<Paragraph
> newParas
= new ArrayList
<Paragraph
>();
730 for (Paragraph para
: paras
) {
731 newParas
.addAll(requotify(para
));
735 // Remove double blanks/brks
736 fixBlanksBreaks(paras
);
743 * Convert the given line into a single {@link Paragraph}.
746 * the source URL of the story
748 * the textual content of the paragraph
750 * @return the {@link Paragraph}
752 private Paragraph
makeParagraph(URL source
, String line
) {
753 BufferedImage image
= null;
754 if (line
.startsWith("[") && line
.endsWith("]")) {
755 image
= getImage(this, source
, line
.substring(1, line
.length() - 1)
760 return new Paragraph(image
);
763 return processPara(line
);
767 * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
768 * those {@link Paragraph}s.
770 * The resulting list will not contain a starting or trailing blank/break
771 * nor 2 blanks or breaks following each other.
774 * the list of {@link Paragraph}s to fix
776 protected void fixBlanksBreaks(List
<Paragraph
> paras
) {
777 boolean space
= false;
779 for (int i
= 0; i
< paras
.size(); i
++) {
780 Paragraph para
= paras
.get(i
);
781 boolean thisSpace
= para
.getType() == ParagraphType
.BLANK
;
782 boolean thisBrk
= para
.getType() == ParagraphType
.BREAK
;
784 if (i
> 0 && space
&& thisBrk
) {
787 } else if ((space
|| brk
) && (thisSpace
|| thisBrk
)) {
796 // Remove blank/brk at start
798 && (paras
.get(0).getType() == ParagraphType
.BLANK
|| paras
.get(
799 0).getType() == ParagraphType
.BREAK
)) {
803 // Remove blank/brk at end
804 int last
= paras
.size() - 1;
806 && (paras
.get(last
).getType() == ParagraphType
.BLANK
|| paras
807 .get(last
).getType() == ParagraphType
.BREAK
)) {
813 * Get the default cover related to this subject (see <tt>.info</tt> files).
818 * @return the cover if any, or NULL
820 static BufferedImage
getDefaultCover(String subject
) {
821 if (subject
!= null && !subject
.isEmpty()
822 && Instance
.getCoverDir() != null) {
824 File fileCover
= new File(Instance
.getCoverDir(), subject
);
825 return getImage(null, fileCover
.toURI().toURL(), subject
);
826 } catch (MalformedURLException e
) {
834 * Return the list of supported image extensions.
836 * @param emptyAllowed
837 * TRUE to allow an empty extension on first place, which can be
838 * used when you may already have an extension in your input but
839 * are not sure about it
841 * @return the extensions
843 static String
[] getImageExt(boolean emptyAllowed
) {
845 return new String
[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
848 return new String
[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
852 * Check if the given resource can be a local image or a remote image, then
853 * refresh the cache with it if it is.
858 * the resource to check
860 * @return the image if found, or NULL
863 static BufferedImage
getImage(BasicSupport support
, URL source
, String line
) {
864 URL url
= getImageUrl(support
, source
, line
);
866 InputStream in
= null;
868 in
= Instance
.getCache().open(url
, getSupport(url
), true);
869 return ImageUtils
.fromStream(in
);
870 } catch (IOException e
) {
875 } catch (IOException e
) {
885 * Check if the given resource can be a local image or a remote image, then
886 * refresh the cache with it if it is.
891 * the resource to check
893 * @return the image URL if found, or NULL
896 static URL
getImageUrl(BasicSupport support
, URL source
, String line
) {
901 if (source
!= null) {
904 String relPath
= null;
905 String absPath
= null;
907 String path
= new File(source
.getFile()).getParent();
908 relPath
= new File(new File(path
), line
.trim())
910 } catch (Exception e
) {
911 // Cannot be converted to path (one possibility to take
912 // into account: absolute path on Windows)
915 absPath
= new File(line
.trim()).getAbsolutePath();
916 } catch (Exception e
) {
917 // Cannot be converted to path (at all)
920 for (String ext
: getImageExt(true)) {
921 if (absPath
!= null && new File(absPath
+ ext
).exists()) {
922 url
= new File(absPath
+ ext
).toURI().toURL();
923 } else if (relPath
!= null
924 && new File(relPath
+ ext
).exists()) {
925 url
= new File(relPath
+ ext
).toURI().toURL();
928 } catch (Exception e
) {
929 // Should not happen since we control the correct arguments
936 for (String ext
: getImageExt(true)) {
937 if (Instance
.getCache()
938 .check(new URL(line
+ ext
), true)) {
939 url
= new URL(line
+ ext
);
946 for (String ext
: getImageExt(true)) {
948 url
= new URL(line
+ ext
);
949 Instance
.getCache().refresh(url
, support
, true);
951 } catch (IOException e
) {
952 // no image with this ext
957 } catch (MalformedURLException e
) {
962 // refresh the cached file
965 Instance
.getCache().refresh(url
, support
, true);
966 } catch (IOException e
) {
967 // woops, broken image
977 * Open the input file that will be used through the support.
979 * Can return NULL, in which case you are supposed to work without an
980 * {@link InputStream}.
983 * the source {@link URL}
985 * @return the {@link InputStream}
987 * @throws IOException
988 * in case of I/O error
990 protected InputStream
openInput(URL source
) throws IOException
{
991 return Instance
.getCache().open(source
, this, false);
995 * Reset then return {@link BasicSupport#in}.
997 * @return {@link BasicSupport#in}
999 protected InputStream
getInput() {
1004 * Fix the author name if it is prefixed with some "by" {@link String}.
1007 * the author with a possible prefix
1009 * @return the author without prefixes
1011 protected String
fixAuthor(String author
) {
1012 if (author
!= null) {
1013 for (String suffix
: new String
[] { " ", ":" }) {
1014 for (String byString
: Instance
.getConfig()
1015 .getString(Config
.BYS
).split(",")) {
1017 if (author
.toUpperCase().startsWith(byString
.toUpperCase())) {
1018 author
= author
.substring(byString
.length()).trim();
1023 // Special case (without suffix):
1024 if (author
.startsWith("©")) {
1025 author
= author
.substring(1);
1033 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
1034 * and requotify them (i.e., separate them into QUOTE paragraphs and other
1035 * paragraphs (quotes or not)).
1038 * the paragraph to requotify (not necessarily a quote)
1040 * @return the correctly (or so we hope) quotified paragraphs
1042 protected List
<Paragraph
> requotify(Paragraph para
) {
1043 List
<Paragraph
> newParas
= new ArrayList
<Paragraph
>();
1045 if (para
.getType() == ParagraphType
.QUOTE
1046 && para
.getContent().length() > 2) {
1047 String line
= para
.getContent();
1048 boolean singleQ
= line
.startsWith("" + openQuote
);
1049 boolean doubleQ
= line
.startsWith("" + openDoubleQuote
);
1051 // Do not try when more than one quote at a time
1052 // (some stories are not easily readable if we do)
1054 && line
.indexOf(closeQuote
, 1) < line
1055 .lastIndexOf(closeQuote
)) {
1060 && line
.indexOf(closeDoubleQuote
, 1) < line
1061 .lastIndexOf(closeDoubleQuote
)) {
1067 if (!singleQ
&& !doubleQ
) {
1068 line
= openDoubleQuote
+ line
+ closeDoubleQuote
;
1069 newParas
.add(new Paragraph(ParagraphType
.QUOTE
, line
, para
1072 char open
= singleQ ? openQuote
: openDoubleQuote
;
1073 char close
= singleQ ? closeQuote
: closeDoubleQuote
;
1076 boolean inQuote
= false;
1078 for (char car
: line
.toCharArray()) {
1081 } else if (car
== close
) {
1083 } else if (car
== '.' && !inQuote
) {
1091 String rest
= line
.substring(posDot
+ 1).trim();
1092 line
= line
.substring(0, posDot
+ 1).trim();
1094 for (char car
: line
.toCharArray()) {
1099 newParas
.add(new Paragraph(ParagraphType
.QUOTE
, line
, words
));
1100 if (!rest
.isEmpty()) {
1101 newParas
.addAll(requotify(processPara(rest
)));
1115 * Process a {@link Paragraph} from a raw line of text.
1117 * Will also fix quotes and HTML encoding if needed.
1122 * @return the processed {@link Paragraph}
1124 protected Paragraph
processPara(String line
) {
1125 line
= ifUnhtml(line
).trim();
1127 boolean space
= true;
1129 boolean quote
= false;
1130 boolean tentativeCloseQuote
= false;
1135 StringBuilder builder
= new StringBuilder();
1136 for (char car
: line
.toCharArray()) {
1138 if (dashCount
> 0) {
1139 // dash, ndash and mdash: - – —
1140 // currently: always use mdash
1141 builder
.append(dashCount
== 1 ?
'-' : '—');
1146 if (tentativeCloseQuote
) {
1147 tentativeCloseQuote
= false;
1148 if (Character
.isLetterOrDigit(car
)) {
1149 builder
.append("'");
1151 // handle double-single quotes as double quotes
1153 builder
.append(closeDoubleQuote
);
1157 builder
.append(closeQuote
);
1162 case ' ': // note: unbreakable space
1165 case '\n': // just in case
1166 case '\r': // just in case
1167 if (builder
.length() > 0
1168 && builder
.charAt(builder
.length() - 1) != ' ') {
1171 builder
.append(' ');
1175 if (space
|| (brk
&& quote
)) {
1177 // handle double-single quotes as double quotes
1179 builder
.deleteCharAt(builder
.length() - 1);
1180 builder
.append(openDoubleQuote
);
1182 builder
.append(openQuote
);
1184 } else if (prev
== ' ' || prev
== car
) {
1185 // handle double-single quotes as double quotes
1187 builder
.deleteCharAt(builder
.length() - 1);
1188 builder
.append(openDoubleQuote
);
1190 builder
.append(openQuote
);
1193 // it is a quote ("I'm off") or a 'quote' ("This
1194 // 'good' restaurant"...)
1195 tentativeCloseQuote
= true;
1200 if (space
|| (brk
&& quote
)) {
1202 builder
.append(openDoubleQuote
);
1203 } else if (prev
== ' ') {
1204 builder
.append(openDoubleQuote
);
1206 builder
.append(closeDoubleQuote
);
1231 builder
.append(car
);
1240 if (space
|| (brk
&& quote
)) {
1242 builder
.append(openQuote
);
1244 // handle double-single quotes as double quotes
1246 builder
.deleteCharAt(builder
.length() - 1);
1247 builder
.append(openDoubleQuote
);
1249 builder
.append(openQuote
);
1263 // handle double-single quotes as double quotes
1265 builder
.deleteCharAt(builder
.length() - 1);
1266 builder
.append(closeDoubleQuote
);
1268 builder
.append(closeQuote
);
1277 if (space
|| (brk
&& quote
)) {
1279 builder
.append(openDoubleQuote
);
1281 builder
.append(openDoubleQuote
);
1294 builder
.append(closeDoubleQuote
);
1300 builder
.append(car
);
1307 if (tentativeCloseQuote
) {
1308 tentativeCloseQuote
= false;
1309 builder
.append(closeQuote
);
1312 line
= builder
.toString().trim();
1314 ParagraphType type
= ParagraphType
.NORMAL
;
1316 type
= ParagraphType
.BLANK
;
1318 type
= ParagraphType
.BREAK
;
1320 type
= ParagraphType
.QUOTE
;
1323 return new Paragraph(type
, line
, words
);
1327 * Remove the HTML from the input <b>if</b> {@link BasicSupport#isHtml()} is
1333 * @return the no html version if needed
1335 private String
ifUnhtml(String input
) {
1336 if (isHtml() && input
!= null) {
1337 return StringUtils
.unhtml(input
);
1344 * Return a {@link BasicSupport} implementation supporting the given
1345 * resource if possible.
1348 * the story resource
1350 * @return an implementation that supports it, or NULL
1352 public static BasicSupport
getSupport(URL url
) {
1357 // TEXT and INFO_TEXT always support files (not URLs though)
1358 for (SupportType type
: SupportType
.values()) {
1359 if (type
!= SupportType
.TEXT
&& type
!= SupportType
.INFO_TEXT
) {
1360 BasicSupport support
= getSupport(type
);
1361 if (support
!= null && support
.supports(url
)) {
1367 for (SupportType type
: new SupportType
[] { SupportType
.INFO_TEXT
,
1368 SupportType
.TEXT
}) {
1369 BasicSupport support
= getSupport(type
);
1370 if (support
!= null && support
.supports(url
)) {
1379 * Return a {@link BasicSupport} implementation supporting the given type.
1384 * @return an implementation that supports it, or NULL
1386 public static BasicSupport
getSupport(SupportType type
) {
1389 return new Epub().setType(type
);
1391 return new InfoText().setType(type
);
1394 // Can fail if no client key or NO in options
1395 return new FimfictionApi().setType(type
);
1396 } catch (IOException e
) {
1397 return new Fimfiction().setType(type
);
1400 return new Fanfiction().setType(type
);
1402 return new Text().setType(type
);
1404 return new MangaFox().setType(type
);
1406 return new E621().setType(type
);
1408 return new YiffStar().setType(type
);
1410 return new EHentai().setType(type
);
1412 return new Cbz().setType(type
);
1414 return new Html().setType(type
);
1421 * Reset the given {@link InputStream} and return it.
1424 * the {@link InputStream} to reset
1426 * @return the same {@link InputStream} after reset
1428 static protected InputStream
reset(InputStream in
) {
1433 } catch (IOException e
) {
1440 * Return the first line from the given input which correspond to the given
1446 * a string that must be found inside the target line (also
1447 * supports "^" at start to say "only if it starts with" the
1449 * @param relativeLine
1450 * the line to return based upon the target line position (-1 =
1451 * the line before, 0 = the target line...)
1455 static protected String
getLine(InputStream in
, String needle
,
1457 return getLine(in
, needle
, relativeLine
, true);
1461 * Return a line from the given input which correspond to the given
1467 * a string that must be found inside the target line (also
1468 * supports "^" at start to say "only if it starts with" the
1470 * @param relativeLine
1471 * the line to return based upon the target line position (-1 =
1472 * the line before, 0 = the target line...)
1474 * takes the first result (as opposed to the last one, which will
1475 * also always spend the input)
1479 static protected String
getLine(InputStream in
, String needle
,
1480 int relativeLine
, boolean first
) {
1485 List
<String
> lines
= new ArrayList
<String
>();
1486 @SuppressWarnings("resource")
1487 Scanner scan
= new Scanner(in
, "UTF-8");
1489 scan
.useDelimiter("\\n");
1490 while (scan
.hasNext()) {
1491 lines
.add(scan
.next());
1494 if (needle
.startsWith("^")) {
1495 if (lines
.get(lines
.size() - 1).startsWith(
1496 needle
.substring(1))) {
1497 index
= lines
.size() - 1;
1501 if (lines
.get(lines
.size() - 1).contains(needle
)) {
1502 index
= lines
.size() - 1;
1507 if (index
>= 0 && index
+ relativeLine
< lines
.size()) {
1508 rep
= lines
.get(index
+ relativeLine
);
1519 * Return the text between the key and the endKey (and optional subKey can
1520 * be passed, in this case we will look for the key first, then take the
1521 * text between the subKey and the endKey).
1523 * Will only match the first line with the given key if more than one are
1524 * possible. Which also means that if the subKey or endKey is not found on
1525 * that line, NULL will be returned.
1530 * the key to match (also supports "^" at start to say
1531 * "only if it starts with" the key)
1533 * the sub key or NULL if none
1535 * the end key or NULL for "up to the end"
1536 * @return the text or NULL if not found
1538 static protected String
getKeyLine(InputStream in
, String key
,
1539 String subKey
, String endKey
) {
1540 return getKeyText(getLine(in
, key
, 0), key
, subKey
, endKey
);
1544 * Return the text between the key and the endKey (and optional subKey can
1545 * be passed, in this case we will look for the key first, then take the
1546 * text between the subKey and the endKey).
1551 * the key to match (also supports "^" at start to say
1552 * "only if it starts with" the key)
1554 * the sub key or NULL if none
1556 * the end key or NULL for "up to the end"
1557 * @return the text or NULL if not found
1559 static protected String
getKeyText(String in
, String key
, String subKey
,
1561 String result
= null;
1564 if (line
!= null && line
.contains(key
)) {
1565 line
= line
.substring(line
.indexOf(key
) + key
.length());
1566 if (subKey
== null || subKey
.isEmpty() || line
.contains(subKey
)) {
1567 if (subKey
!= null) {
1568 line
= line
.substring(line
.indexOf(subKey
)
1571 if (endKey
== null || line
.contains(endKey
)) {
1572 if (endKey
!= null) {
1573 line
= line
.substring(0, line
.indexOf(endKey
));
1584 * Return the text between the key and the endKey (optional subKeys can be
1585 * passed, in this case we will look for the subKeys first, then take the
1586 * text between the key and the endKey).
1593 * the end key or NULL for "up to the end"
1595 * the sub-keys to find before checking for key/endKey
1597 * @return the text or NULL if not found
1599 static protected String
getKeyTextAfter(String in
, String key
,
1600 String endKey
, String
... afters
) {
1602 if (in
!= null && !in
.isEmpty()) {
1603 int pos
= indexOfAfter(in
, 0, afters
);
1608 in
= in
.substring(pos
);
1611 return getKeyText(in
, key
, null, endKey
);
1615 * Return the first index after all the given "afters" have been found in
1616 * the {@link String}, or -1 if it was not possible.
1621 * start at this position in the string
1623 * the sub-keys to find before checking for key/endKey
1625 * @return the text or NULL if not found
1627 static protected int indexOfAfter(String in
, int startAt
, String
... afters
) {
1629 if (in
!= null && !in
.isEmpty()) {
1631 if (afters
!= null) {
1632 for (int i
= 0; pos
>= 0 && i
< afters
.length
; i
++) {
1633 String subKey
= afters
[i
];
1634 if (!subKey
.isEmpty()) {
1635 pos
= in
.indexOf(subKey
, pos
);
1637 pos
+= subKey
.length();