1 package be
.nikiroo
.fanfix
.supported
;
3 import java
.awt
.image
.BufferedImage
;
4 import java
.io
.BufferedReader
;
5 import java
.io
.ByteArrayInputStream
;
7 import java
.io
.IOException
;
8 import java
.io
.InputStream
;
9 import java
.io
.InputStreamReader
;
10 import java
.net
.MalformedURLException
;
12 import java
.util
.ArrayList
;
13 import java
.util
.Date
;
14 import java
.util
.HashMap
;
15 import java
.util
.List
;
17 import java
.util
.Map
.Entry
;
18 import java
.util
.Scanner
;
20 import be
.nikiroo
.fanfix
.Instance
;
21 import be
.nikiroo
.fanfix
.bundles
.Config
;
22 import be
.nikiroo
.fanfix
.bundles
.StringId
;
23 import be
.nikiroo
.fanfix
.data
.Chapter
;
24 import be
.nikiroo
.fanfix
.data
.MetaData
;
25 import be
.nikiroo
.fanfix
.data
.Paragraph
;
26 import be
.nikiroo
.fanfix
.data
.Paragraph
.ParagraphType
;
27 import be
.nikiroo
.fanfix
.data
.Story
;
28 import be
.nikiroo
.utils
.IOUtils
;
29 import be
.nikiroo
.utils
.Progress
;
30 import be
.nikiroo
.utils
.StringUtils
;
33 * This class is the base class used by the other support classes. It can be
34 * used outside of this package, and have static method that you can use to get
35 * access to the correct support class.
37 * It will be used with 'resources' (usually web pages or files).
41 public abstract class BasicSupport
{
43 * The supported input types for which we can get a {@link BasicSupport}
48 public enum SupportType
{
49 /** EPUB files created with this program */
51 /** Pure text file with some rules */
53 /** TEXT but with associated .info file */
55 /** My Little Pony fanfictions */
57 /** Fanfictions from a lot of different universes */
59 /** Website with lots of Mangas */
61 /** Furry website with comics support */
63 /** Furry website with stories */
65 /** Comics and images groups, mostly but not only NSFW */
73 * A description of this support type (more information than the
74 * {@link BasicSupport#getSourceName()}).
76 * @return the description
78 public String
getDesc() {
79 String desc
= Instance
.getTrans().getStringX(StringId
.INPUT_DESC
,
83 desc
= Instance
.getTrans().getString(StringId
.INPUT_DESC
, this);
90 * The name of this support type (a short version).
94 public String
getSourceName() {
95 BasicSupport support
= BasicSupport
.getSupport(this);
96 if (support
!= null) {
97 return support
.getSourceName();
104 public String
toString() {
105 return super.toString().toLowerCase();
109 * Call {@link SupportType#valueOf(String.toUpperCase())}.
112 * the possible type name
114 * @return NULL or the type
116 public static SupportType
valueOfUC(String typeName
) {
117 return SupportType
.valueOf(typeName
== null ?
null : typeName
122 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
123 * NULL for NULL instead of raising exception.
126 * the possible type name
128 * @return NULL or the type
130 public static SupportType
valueOfNullOkUC(String typeName
) {
131 if (typeName
== null) {
135 return SupportType
.valueOfUC(typeName
);
139 * Call {@link SupportType#valueOf(String.toUpperCase())} but return
140 * NULL in case of error instead of raising an exception.
143 * the possible type name
145 * @return NULL or the type
147 public static SupportType
valueOfAllOkUC(String typeName
) {
149 return SupportType
.valueOfUC(typeName
);
150 } catch (Exception e
) {
156 private InputStream in
;
157 private SupportType type
;
158 private URL currentReferer
; // with only one 'r', as in 'HTTP'...
161 private char openQuote
= Instance
.getTrans().getCharacter(
162 StringId
.OPEN_SINGLE_QUOTE
);
163 private char closeQuote
= Instance
.getTrans().getCharacter(
164 StringId
.CLOSE_SINGLE_QUOTE
);
165 private char openDoubleQuote
= Instance
.getTrans().getCharacter(
166 StringId
.OPEN_DOUBLE_QUOTE
);
167 private char closeDoubleQuote
= Instance
.getTrans().getCharacter(
168 StringId
.CLOSE_DOUBLE_QUOTE
);
171 * The name of this support class.
175 protected abstract String
getSourceName();
178 * Check if the given resource is supported by this {@link BasicSupport}.
181 * the resource to check for
183 * @return TRUE if it is
185 protected abstract boolean supports(URL url
);
188 * Return TRUE if the support will return HTML encoded content values for
189 * the chapters content.
191 * @return TRUE for HTML
193 protected abstract boolean isHtml();
195 protected abstract MetaData
getMeta(URL source
, InputStream in
)
199 * Return the story description.
202 * the source of the story
204 * the input (the main resource)
206 * @return the description
208 * @throws IOException
209 * in case of I/O error
211 protected abstract String
getDesc(URL source
, InputStream in
)
215 * Return the list of chapters (name and resource).
218 * the source of the story
220 * the input (the main resource)
222 * the optional progress reporter
224 * @return the chapters
226 * @throws IOException
227 * in case of I/O error
229 protected abstract List
<Entry
<String
, URL
>> getChapters(URL source
,
230 InputStream in
, Progress pg
) throws IOException
;
233 * Return the content of the chapter (possibly HTML encoded, if
234 * {@link BasicSupport#isHtml()} is TRUE).
237 * the source of the story
239 * the input (the main resource)
243 * the optional progress reporter
245 * @return the content
247 * @throws IOException
248 * in case of I/O error
250 protected abstract String
getChapterContent(URL source
, InputStream in
,
251 int number
, Progress pg
) throws IOException
;
254 * Log into the support (can be a no-op depending upon the support).
256 * @throws IOException
257 * in case of I/O error
259 public void login() throws IOException
{
264 * Return the list of cookies (values included) that must be used to
265 * correctly fetch the resources.
267 * You are expected to call the super method implementation if you override
270 * @return the cookies
272 * @throws IOException
273 * in case of I/O error
275 public Map
<String
, String
> getCookies() throws IOException
{
276 return new HashMap
<String
, String
>();
280 * Return the canonical form of the main {@link URL}.
283 * the source {@link URL}
285 * @return the canonical form of this {@link URL}
287 * @throws IOException
288 * in case of I/O error
290 public URL
getCanonicalUrl(URL source
) throws IOException
{
295 * Process the given story resource into a partially filled {@link Story}
296 * object containing the name and metadata, except for the description.
301 * @return the {@link Story}
303 * @throws IOException
304 * in case of I/O error
306 public Story
processMeta(URL url
) throws IOException
{
307 return processMeta(url
, true, false, null);
311 * Process the given story resource into a partially filled {@link Story}
312 * object containing the name and metadata.
318 * close "this" and "in" when done
320 * the optional progress reporter
322 * @return the {@link Story}
324 * @throws IOException
325 * in case of I/O error
327 protected Story
processMeta(URL url
, boolean close
, boolean getDesc
,
328 Progress pg
) throws IOException
{
332 pg
.setMinMax(0, 100);
338 url
= getCanonicalUrl(url
);
340 setCurrentReferer(url
);
348 preprocess(url
, getInput());
351 Story story
= new Story();
352 MetaData meta
= getMeta(url
, getInput());
353 if (meta
.getCreationDate() == null
354 || meta
.getCreationDate().isEmpty()) {
355 meta
.setCreationDate(StringUtils
.fromTime(new Date().getTime()));
361 if (meta
!= null && meta
.getCover() == null) {
362 meta
.setCover(getDefaultCover(meta
.getSubject()));
368 String descChapterName
= Instance
.getTrans().getString(
369 StringId
.DESCRIPTION
);
370 story
.getMeta().setResume(
371 makeChapter(url
, 0, descChapterName
,
372 getDesc(url
, getInput()), null));
381 } catch (IOException e
) {
390 setCurrentReferer(null);
395 * Process the given story resource into a fully filled {@link Story}
401 * the optional progress reporter
403 * @return the {@link Story}
405 * @throws IOException
406 * in case of I/O error
408 public Story
process(URL url
, Progress pg
) throws IOException
{
412 pg
.setMinMax(0, 100);
415 url
= getCanonicalUrl(url
);
418 Progress pgMeta
= new Progress();
419 pg
.addProgress(pgMeta
, 10);
420 Story story
= processMeta(url
, false, true, pgMeta
);
421 if (!pgMeta
.isDone()) {
422 pgMeta
.setProgress(pgMeta
.getMax()); // 10%
430 pg
.setName("Retrieving " + story
.getMeta().getTitle());
432 setCurrentReferer(url
);
434 Progress pgGetChapters
= new Progress();
435 pg
.addProgress(pgGetChapters
, 10);
436 story
.setChapters(new ArrayList
<Chapter
>());
437 List
<Entry
<String
, URL
>> chapters
= getChapters(url
, getInput(),
439 if (!pgGetChapters
.isDone()) {
440 pgGetChapters
.setProgress(pgGetChapters
.getMax()); // 20%
443 if (chapters
!= null) {
444 Progress pgChaps
= new Progress("Extracting chapters", 0,
445 chapters
.size() * 300);
446 pg
.addProgress(pgChaps
, 80);
450 for (Entry
<String
, URL
> chap
: chapters
) {
451 pgChaps
.setName("Extracting chapter " + i
);
452 setCurrentReferer(chap
.getValue());
453 InputStream chapIn
= Instance
.getCache().open(
454 chap
.getValue(), this, true);
455 pgChaps
.setProgress(i
* 100);
457 Progress pgGetChapterContent
= new Progress();
458 Progress pgMakeChapter
= new Progress();
459 pgChaps
.addProgress(pgGetChapterContent
, 100);
460 pgChaps
.addProgress(pgMakeChapter
, 100);
462 String content
= getChapterContent(url
, chapIn
, i
,
463 pgGetChapterContent
);
464 if (!pgGetChapterContent
.isDone()) {
465 pgGetChapterContent
.setProgress(pgGetChapterContent
469 Chapter cc
= makeChapter(url
, i
, chap
.getKey(),
470 content
, pgMakeChapter
);
471 if (!pgMakeChapter
.isDone()) {
472 pgMakeChapter
.setProgress(pgMakeChapter
.getMax());
475 words
+= cc
.getWords();
476 story
.getChapters().add(cc
);
477 if (story
.getMeta() != null) {
478 story
.getMeta().setWords(words
);
487 pgChaps
.setName("Extracting chapters");
497 } catch (IOException e
) {
505 setCurrentReferer(null);
514 public SupportType
getType() {
519 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
520 * the current {@link URL} we work on.
522 * @return the referer
524 public URL
getCurrentReferer() {
525 return currentReferer
;
529 * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
530 * the current {@link URL} we work on.
532 * @param currentReferer
535 protected void setCurrentReferer(URL currentReferer
) {
536 this.currentReferer
= currentReferer
;
547 protected BasicSupport
setType(SupportType type
) {
553 * Prepare the support if needed before processing.
556 * the source of the story
558 * the input (the main resource)
560 * @throws IOException
563 protected void preprocess(URL source
, InputStream in
) throws IOException
{
567 * Now that we have processed the {@link Story}, close the resources if any.
569 * @throws IOException
572 protected void close() throws IOException
{
576 * Create a {@link Chapter} object from the given information, formatting
577 * the content as it should be.
584 * the chapter content
586 * the optional progress reporter
588 * @return the {@link Chapter}
590 * @throws IOException
591 * in case of I/O error
593 protected Chapter
makeChapter(URL source
, int number
, String name
,
594 String content
, Progress pg
) throws IOException
{
595 // Chapter name: process it correctly, then remove the possible
596 // redundant "Chapter x: " in front of it, or "-" (as in
597 // "Chapter 5: - Fun!" after the ": " was automatically added)
598 String chapterName
= processPara(name
).getContent().trim();
599 for (String lang
: Instance
.getConfig().getString(Config
.CHAPTER
)
601 String chapterWord
= Instance
.getConfig().getStringX(
602 Config
.CHAPTER
, lang
);
603 if (chapterName
.startsWith(chapterWord
)) {
604 chapterName
= chapterName
.substring(chapterWord
.length())
610 if (chapterName
.startsWith(Integer
.toString(number
))) {
611 chapterName
= chapterName
.substring(
612 Integer
.toString(number
).length()).trim();
615 while (chapterName
.startsWith(":") || chapterName
.startsWith("-")) {
616 chapterName
= chapterName
.substring(1).trim();
620 Chapter chap
= new Chapter(number
, chapterName
);
622 if (content
!= null) {
623 List
<Paragraph
> paras
= makeParagraphs(source
, content
, pg
);
625 for (Paragraph para
: paras
) {
626 words
+= para
.getWords();
628 chap
.setParagraphs(paras
);
629 chap
.setWords(words
);
637 * Convert the given content into {@link Paragraph}s.
640 * the source URL of the story
642 * the textual content
644 * the optional progress reporter
646 * @return the {@link Paragraph}s
648 * @throws IOException
649 * in case of I/O error
651 protected List
<Paragraph
> makeParagraphs(URL source
, String content
,
652 Progress pg
) throws IOException
{
658 // Special <HR> processing:
659 content
= content
.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
663 List
<Paragraph
> paras
= new ArrayList
<Paragraph
>();
665 if (content
!= null && !content
.trim().isEmpty()) {
667 String
[] tab
= content
.split("(<p>|</p>|<br>|<br/>)");
668 pg
.setMinMax(0, tab
.length
);
670 for (String line
: tab
) {
671 if (line
.startsWith("[") && line
.endsWith("]")) {
672 pg
.setName("Extracting image " + i
);
674 paras
.add(makeParagraph(source
, line
.trim()));
679 List
<String
> lines
= new ArrayList
<String
>();
680 BufferedReader buff
= null;
682 buff
= new BufferedReader(
683 new InputStreamReader(new ByteArrayInputStream(
684 content
.getBytes("UTF-8")), "UTF-8"));
685 for (String line
= buff
.readLine(); line
!= null; line
= buff
687 lines
.add(line
.trim());
695 pg
.setMinMax(0, lines
.size());
697 for (String line
: lines
) {
698 if (line
.startsWith("[") && line
.endsWith("]")) {
699 pg
.setName("Extracting image " + i
);
701 paras
.add(makeParagraph(source
, line
));
707 // Check quotes for "bad" format
708 List
<Paragraph
> newParas
= new ArrayList
<Paragraph
>();
709 for (Paragraph para
: paras
) {
710 newParas
.addAll(requotify(para
));
714 // Remove double blanks/brks
715 fixBlanksBreaks(paras
);
722 * Convert the given line into a single {@link Paragraph}.
725 * the source URL of the story
727 * the textual content of the paragraph
729 * @return the {@link Paragraph}
731 private Paragraph
makeParagraph(URL source
, String line
) {
733 if (line
.startsWith("[") && line
.endsWith("]")) {
734 image
= getImageUrl(this, source
,
735 line
.substring(1, line
.length() - 1).trim());
739 return new Paragraph(image
);
741 return processPara(line
);
746 * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
747 * those {@link Paragraph}s.
749 * The resulting list will not contain a starting or trailing blank/break
750 * nor 2 blanks or breaks following each other.
753 * the list of {@link Paragraph}s to fix
755 protected void fixBlanksBreaks(List
<Paragraph
> paras
) {
756 boolean space
= false;
758 for (int i
= 0; i
< paras
.size(); i
++) {
759 Paragraph para
= paras
.get(i
);
760 boolean thisSpace
= para
.getType() == ParagraphType
.BLANK
;
761 boolean thisBrk
= para
.getType() == ParagraphType
.BREAK
;
763 if (i
> 0 && space
&& thisBrk
) {
766 } else if ((space
|| brk
) && (thisSpace
|| thisBrk
)) {
775 // Remove blank/brk at start
777 && (paras
.get(0).getType() == ParagraphType
.BLANK
|| paras
.get(
778 0).getType() == ParagraphType
.BREAK
)) {
782 // Remove blank/brk at end
783 int last
= paras
.size() - 1;
785 && (paras
.get(last
).getType() == ParagraphType
.BLANK
|| paras
786 .get(last
).getType() == ParagraphType
.BREAK
)) {
792 * Get the default cover related to this subject (see <tt>.info</tt> files).
797 * @return the cover if any, or NULL
799 static BufferedImage
getDefaultCover(String subject
) {
800 if (subject
!= null && !subject
.isEmpty()
801 && Instance
.getCoverDir() != null) {
803 File fileCover
= new File(Instance
.getCoverDir(), subject
);
804 return getImage(null, fileCover
.toURI().toURL(), subject
);
805 } catch (MalformedURLException e
) {
813 * Return the list of supported image extensions.
815 * @param emptyAllowed
816 * TRUE to allow an empty extension on first place, which can be
817 * used when you may already have an extension in your input but
818 * are not sure about it
820 * @return the extensions
822 static String
[] getImageExt(boolean emptyAllowed
) {
824 return new String
[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
826 return new String
[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
831 * Check if the given resource can be a local image or a remote image, then
832 * refresh the cache with it if it is.
837 * the resource to check
839 * @return the image if found, or NULL
842 static BufferedImage
getImage(BasicSupport support
, URL source
, String line
) {
843 URL url
= getImageUrl(support
, source
, line
);
845 InputStream in
= null;
847 in
= Instance
.getCache().open(url
, getSupport(url
), true);
848 return IOUtils
.toImage(in
);
849 } catch (IOException e
) {
854 } catch (IOException e
) {
864 * Check if the given resource can be a local image or a remote image, then
865 * refresh the cache with it if it is.
870 * the resource to check
872 * @return the image URL if found, or NULL
875 static URL
getImageUrl(BasicSupport support
, URL source
, String line
) {
880 if (source
!= null) {
883 String relPath
= null;
884 String absPath
= null;
886 String path
= new File(source
.getFile()).getParent();
887 relPath
= new File(new File(path
), line
.trim())
889 } catch (Exception e
) {
890 // Cannot be converted to path (one possibility to take
891 // into account: absolute path on Windows)
894 absPath
= new File(line
.trim()).getAbsolutePath();
895 } catch (Exception e
) {
896 // Cannot be converted to path (at all)
899 for (String ext
: getImageExt(true)) {
900 if (absPath
!= null && new File(absPath
+ ext
).exists()) {
901 url
= new File(absPath
+ ext
).toURI().toURL();
902 } else if (relPath
!= null
903 && new File(relPath
+ ext
).exists()) {
904 url
= new File(relPath
+ ext
).toURI().toURL();
907 } catch (Exception e
) {
908 // Should not happen since we control the correct arguments
915 for (String ext
: getImageExt(true)) {
916 if (Instance
.getCache().check(new URL(line
+ ext
))) {
917 url
= new URL(line
+ ext
);
924 for (String ext
: getImageExt(true)) {
926 url
= new URL(line
+ ext
);
927 Instance
.getCache().refresh(url
, support
, true);
929 } catch (IOException e
) {
930 // no image with this ext
935 } catch (MalformedURLException e
) {
940 // refresh the cached file
943 Instance
.getCache().refresh(url
, support
, true);
944 } catch (IOException e
) {
945 // woops, broken image
955 * Open the input file that will be used through the support.
958 * the source {@link URL}
960 * @return the {@link InputStream}
962 * @throws IOException
963 * in case of I/O error
965 protected InputStream
openInput(URL source
) throws IOException
{
966 return Instance
.getCache().open(source
, this, false);
970 * Reset the given {@link InputStream} and return it.
973 * the {@link InputStream} to reset
975 * @return the same {@link InputStream} after reset
977 protected InputStream
reset(InputStream in
) {
980 } catch (IOException e
) {
986 * Reset then return {@link BasicSupport#in}.
988 * @return {@link BasicSupport#in}
990 protected InputStream
getInput() {
995 * Fix the author name if it is prefixed with some "by" {@link String}.
998 * the author with a possible prefix
1000 * @return the author without prefixes
1002 protected String
fixAuthor(String author
) {
1003 if (author
!= null) {
1004 for (String suffix
: new String
[] { " ", ":" }) {
1005 for (String byString
: Instance
.getConfig()
1006 .getString(Config
.BYS
).split(",")) {
1008 if (author
.toUpperCase().startsWith(byString
.toUpperCase())) {
1009 author
= author
.substring(byString
.length()).trim();
1014 // Special case (without suffix):
1015 if (author
.startsWith("©")) {
1016 author
= author
.substring(1);
1024 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
1025 * and requotify them (i.e., separate them into QUOTE paragraphs and other
1026 * paragraphs (quotes or not)).
1029 * the paragraph to requotify (not necessarily a quote)
1031 * @return the correctly (or so we hope) quotified paragraphs
1033 protected List
<Paragraph
> requotify(Paragraph para
) {
1034 List
<Paragraph
> newParas
= new ArrayList
<Paragraph
>();
1036 if (para
.getType() == ParagraphType
.QUOTE
1037 && para
.getContent().length() > 2) {
1038 String line
= para
.getContent();
1039 boolean singleQ
= line
.startsWith("" + openQuote
);
1040 boolean doubleQ
= line
.startsWith("" + openDoubleQuote
);
1042 // Do not try when more than one quote at a time
1043 // (some stories are not easily readable if we do)
1045 && line
.indexOf(closeQuote
, 1) < line
1046 .lastIndexOf(closeQuote
)) {
1051 && line
.indexOf(closeDoubleQuote
, 1) < line
1052 .lastIndexOf(closeDoubleQuote
)) {
1058 if (!singleQ
&& !doubleQ
) {
1059 line
= openDoubleQuote
+ line
+ closeDoubleQuote
;
1060 newParas
.add(new Paragraph(ParagraphType
.QUOTE
, line
, para
1063 char open
= singleQ ? openQuote
: openDoubleQuote
;
1064 char close
= singleQ ? closeQuote
: closeDoubleQuote
;
1067 boolean inQuote
= false;
1069 for (char car
: line
.toCharArray()) {
1072 } else if (car
== close
) {
1074 } else if (car
== '.' && !inQuote
) {
1082 String rest
= line
.substring(posDot
+ 1).trim();
1083 line
= line
.substring(0, posDot
+ 1).trim();
1085 for (char car
: line
.toCharArray()) {
1090 newParas
.add(new Paragraph(ParagraphType
.QUOTE
, line
, words
));
1091 if (!rest
.isEmpty()) {
1092 newParas
.addAll(requotify(processPara(rest
)));
1106 * Process a {@link Paragraph} from a raw line of text.
1108 * Will also fix quotes and HTML encoding if needed.
1113 * @return the processed {@link Paragraph}
1115 protected Paragraph
processPara(String line
) {
1116 line
= ifUnhtml(line
).trim();
1118 boolean space
= true;
1120 boolean quote
= false;
1121 boolean tentativeCloseQuote
= false;
1126 StringBuilder builder
= new StringBuilder();
1127 for (char car
: line
.toCharArray()) {
1129 if (dashCount
> 0) {
1130 // dash, ndash and mdash: - – —
1131 // currently: always use mdash
1132 builder
.append(dashCount
== 1 ?
'-' : '—');
1137 if (tentativeCloseQuote
) {
1138 tentativeCloseQuote
= false;
1139 if (Character
.isLetterOrDigit(car
)) {
1140 builder
.append("'");
1142 // handle double-single quotes as double quotes
1144 builder
.append(closeDoubleQuote
);
1147 builder
.append(closeQuote
);
1153 case ' ': // note: unbreakable space
1156 case '\n': // just in case
1157 case '\r': // just in case
1158 if (builder
.length() > 0
1159 && builder
.charAt(builder
.length() - 1) != ' ') {
1162 builder
.append(' ');
1166 if (space
|| (brk
&& quote
)) {
1168 // handle double-single quotes as double quotes
1170 builder
.deleteCharAt(builder
.length() - 1);
1171 builder
.append(openDoubleQuote
);
1173 builder
.append(openQuote
);
1175 } else if (prev
== ' ' || prev
== car
) {
1176 // handle double-single quotes as double quotes
1178 builder
.deleteCharAt(builder
.length() - 1);
1179 builder
.append(openDoubleQuote
);
1181 builder
.append(openQuote
);
1184 // it is a quote ("I'm off") or a 'quote' ("This
1185 // 'good' restaurant"...)
1186 tentativeCloseQuote
= true;
1191 if (space
|| (brk
&& quote
)) {
1193 builder
.append(openDoubleQuote
);
1194 } else if (prev
== ' ') {
1195 builder
.append(openDoubleQuote
);
1197 builder
.append(closeDoubleQuote
);
1222 builder
.append(car
);
1231 if (space
|| (brk
&& quote
)) {
1233 builder
.append(openQuote
);
1235 // handle double-single quotes as double quotes
1237 builder
.deleteCharAt(builder
.length() - 1);
1238 builder
.append(openDoubleQuote
);
1240 builder
.append(openQuote
);
1254 // handle double-single quotes as double quotes
1256 builder
.deleteCharAt(builder
.length() - 1);
1257 builder
.append(closeDoubleQuote
);
1259 builder
.append(closeQuote
);
1268 if (space
|| (brk
&& quote
)) {
1270 builder
.append(openDoubleQuote
);
1272 builder
.append(openDoubleQuote
);
1285 builder
.append(closeDoubleQuote
);
1291 builder
.append(car
);
1298 if (tentativeCloseQuote
) {
1299 tentativeCloseQuote
= false;
1300 builder
.append(closeQuote
);
1303 line
= builder
.toString().trim();
1305 ParagraphType type
= ParagraphType
.NORMAL
;
1307 type
= ParagraphType
.BLANK
;
1309 type
= ParagraphType
.BREAK
;
1311 type
= ParagraphType
.QUOTE
;
1314 return new Paragraph(type
, line
, words
);
1318 * Remove the HTML from the input <b>if</b> {@link BasicSupport#isHtml()} is
1324 * @return the no html version if needed
1326 private String
ifUnhtml(String input
) {
1327 if (isHtml() && input
!= null) {
1328 return StringUtils
.unhtml(input
);
1335 * Return a {@link BasicSupport} implementation supporting the given
1336 * resource if possible.
1339 * the story resource
1341 * @return an implementation that supports it, or NULL
1343 public static BasicSupport
getSupport(URL url
) {
1348 // TEXT and INFO_TEXT always support files (not URLs though)
1349 for (SupportType type
: SupportType
.values()) {
1350 if (type
!= SupportType
.TEXT
&& type
!= SupportType
.INFO_TEXT
) {
1351 BasicSupport support
= getSupport(type
);
1352 if (support
!= null && support
.supports(url
)) {
1358 for (SupportType type
: new SupportType
[] { SupportType
.INFO_TEXT
,
1359 SupportType
.TEXT
}) {
1360 BasicSupport support
= getSupport(type
);
1361 if (support
!= null && support
.supports(url
)) {
1370 * Return a {@link BasicSupport} implementation supporting the given type.
1375 * @return an implementation that supports it, or NULL
1377 public static BasicSupport
getSupport(SupportType type
) {
1380 return new Epub().setType(type
);
1382 return new InfoText().setType(type
);
1384 return new Fimfiction().setType(type
);
1386 return new Fanfiction().setType(type
);
1388 return new Text().setType(type
);
1390 return new MangaFox().setType(type
);
1392 return new E621().setType(type
);
1394 return new YiffStar().setType(type
);
1396 return new EHentai().setType(type
);
1398 return new Cbz().setType(type
);
1400 return new Html().setType(type
);
1407 * Return the first line from the given input which correspond to the given
1413 * a string that must be found inside the target line (also
1414 * supports "^" at start to say "only if it starts with" the
1416 * @param relativeLine
1417 * the line to return based upon the target line position (-1 =
1418 * the line before, 0 = the target line...)
1422 static String
getLine(InputStream in
, String needle
, int relativeLine
) {
1423 return getLine(in
, needle
, relativeLine
, true);
1427 * Return a line from the given input which correspond to the given
1433 * a string that must be found inside the target line (also
1434 * supports "^" at start to say "only if it starts with" the
1436 * @param relativeLine
1437 * the line to return based upon the target line position (-1 =
1438 * the line before, 0 = the target line...)
1440 * takes the first result (as opposed to the last one, which will
1441 * also always spend the input)
1445 static String
getLine(InputStream in
, String needle
, int relativeLine
,
1451 } catch (IOException e
) {
1455 List
<String
> lines
= new ArrayList
<String
>();
1456 @SuppressWarnings("resource")
1457 Scanner scan
= new Scanner(in
, "UTF-8");
1459 scan
.useDelimiter("\\n");
1460 while (scan
.hasNext()) {
1461 lines
.add(scan
.next());
1464 if (needle
.startsWith("^")) {
1465 if (lines
.get(lines
.size() - 1).startsWith(
1466 needle
.substring(1))) {
1467 index
= lines
.size() - 1;
1471 if (lines
.get(lines
.size() - 1).contains(needle
)) {
1472 index
= lines
.size() - 1;
1477 if (index
>= 0 && index
+ relativeLine
< lines
.size()) {
1478 rep
= lines
.get(index
+ relativeLine
);
1489 * Return the text between the key and the endKey (and optional subKey can
1490 * be passed, in this case we will look for the key first, then take the
1491 * text between the subKey and the endKey).
1493 * Will only match the first line with the given key if more than one are
1494 * possible. Which also means that if the subKey or endKey is not found on
1495 * that line, NULL will be returned.
1500 * the key to match (also supports "^" at start to say
1501 * "only if it starts with" the key)
1503 * the sub key or NULL if none
1505 * the end key or NULL for "up to the end"
1506 * @return the text or NULL if not found
1508 static String
getKeyLine(InputStream in
, String key
, String subKey
,
1510 String result
= null;
1512 String line
= getLine(in
, key
, 0);
1513 if (line
!= null && line
.contains(key
)) {
1514 line
= line
.substring(line
.indexOf(key
) + key
.length());
1515 if (subKey
== null || subKey
.isEmpty() || line
.contains(subKey
)) {
1516 if (subKey
!= null) {
1517 line
= line
.substring(line
.indexOf(subKey
)
1520 if (endKey
== null || line
.contains(endKey
)) {
1521 if (endKey
!= null) {
1522 line
= line
.substring(0, line
.indexOf(endKey
));