X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FBasicSupport.java;h=56a3bb80cb6d51fe40a9c4d830e12cb275102d76;hb=2a5c763d4d754c23bd65ed61fb47e35b62bdd3aa;hp=e6089eb046aff22654778d60678c235ea75745ea;hpb=315f14ae3752d90c683a07fa20f1aa53f6010d6d;p=fanfix.git diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport.java b/src/be/nikiroo/fanfix/supported/BasicSupport.java deleted file mode 100644 index e6089eb..0000000 --- a/src/be/nikiroo/fanfix/supported/BasicSupport.java +++ /dev/null @@ -1,1652 +0,0 @@ -package be.nikiroo.fanfix.supported; - -import java.awt.image.BufferedImage; -import java.io.BufferedReader; -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.net.MalformedURLException; -import java.net.URL; -import java.util.ArrayList; -import java.util.Date; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Scanner; - -import be.nikiroo.fanfix.Instance; -import be.nikiroo.fanfix.bundles.Config; -import be.nikiroo.fanfix.bundles.StringId; -import be.nikiroo.fanfix.data.Chapter; -import be.nikiroo.fanfix.data.MetaData; -import be.nikiroo.fanfix.data.Paragraph; -import be.nikiroo.fanfix.data.Paragraph.ParagraphType; -import be.nikiroo.fanfix.data.Story; -import be.nikiroo.utils.ImageUtils; -import be.nikiroo.utils.Progress; -import be.nikiroo.utils.StringUtils; - -/** - * This class is the base class used by the other support classes. It can be - * used outside of this package, and have static method that you can use to get - * access to the correct support class. - *
- * It will be used with 'resources' (usually web pages or files).
- *
- * @author niki
- */
-public abstract class BasicSupport {
- /**
- * The supported input types for which we can get a {@link BasicSupport}
- * object.
- *
- * @author niki
- */
- public enum SupportType {
- /** EPUB files created with this program */
- EPUB,
- /** Pure text file with some rules */
- TEXT,
- /** TEXT but with associated .info file */
- INFO_TEXT,
- /** My Little Pony fanfictions */
- FIMFICTION,
- /** Fanfictions from a lot of different universes */
- FANFICTION,
- /** Website with lots of Mangas */
- MANGAFOX,
- /** Furry website with comics support */
- E621,
- /** Furry website with stories */
- YIFFSTAR,
- /** Comics and images groups, mostly but not only NSFW */
- E_HENTAI,
- /** CBZ files */
- CBZ,
- /** HTML files */
- HTML;
-
- /**
- * A description of this support type (more information than the
- * {@link BasicSupport#getSourceName()}).
- *
- * @return the description
- */
- public String getDesc() {
- String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
- this.name());
-
- if (desc == null) {
- desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
- }
-
- return desc;
- }
-
- /**
- * The name of this support type (a short version).
- *
- * @return the name
- */
- public String getSourceName() {
- BasicSupport support = BasicSupport.getSupport(this);
- if (support != null) {
- return support.getSourceName();
- }
-
- return null;
- }
-
- @Override
- public String toString() {
- return super.toString().toLowerCase();
- }
-
- /**
- * Call {@link SupportType#valueOf(String)} after conversion to upper
- * case.
- *
- * @param typeName
- * the possible type name
- *
- * @return NULL or the type
- */
- public static SupportType valueOfUC(String typeName) {
- return SupportType.valueOf(typeName == null ? null : typeName
- .toUpperCase());
- }
-
- /**
- * Call {@link SupportType#valueOf(String)} after conversion to upper
- * case but return NULL for NULL instead of raising exception.
- *
- * @param typeName
- * the possible type name
- *
- * @return NULL or the type
- */
- public static SupportType valueOfNullOkUC(String typeName) {
- if (typeName == null) {
- return null;
- }
-
- return SupportType.valueOfUC(typeName);
- }
-
- /**
- * Call {@link SupportType#valueOf(String)} after conversion to upper
- * case but return NULL in case of error instead of raising an
- * exception.
- *
- * @param typeName
- * the possible type name
- *
- * @return NULL or the type
- */
- public static SupportType valueOfAllOkUC(String typeName) {
- try {
- return SupportType.valueOfUC(typeName);
- } catch (Exception e) {
- return null;
- }
- }
- }
-
- private InputStream in;
- private SupportType type;
- private URL currentReferer; // with only one 'r', as in 'HTTP'...
-
- // quote chars
- private char openQuote = Instance.getTrans().getCharacter(
- StringId.OPEN_SINGLE_QUOTE);
- private char closeQuote = Instance.getTrans().getCharacter(
- StringId.CLOSE_SINGLE_QUOTE);
- private char openDoubleQuote = Instance.getTrans().getCharacter(
- StringId.OPEN_DOUBLE_QUOTE);
- private char closeDoubleQuote = Instance.getTrans().getCharacter(
- StringId.CLOSE_DOUBLE_QUOTE);
-
- /**
- * The name of this support class.
- *
- * @return the name
- */
- protected abstract String getSourceName();
-
- /**
- * Check if the given resource is supported by this {@link BasicSupport}.
- *
- * @param url
- * the resource to check for
- *
- * @return TRUE if it is
- */
- protected abstract boolean supports(URL url);
-
- /**
- * Return TRUE if the support will return HTML encoded content values for
- * the chapters content.
- *
- * @return TRUE for HTML
- */
- protected abstract boolean isHtml();
-
- /**
- * Return the {@link MetaData} of this story.
- *
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
- *
- * @return the associated {@link MetaData}
- *
- * @throws IOException
- * in case of I/O error
- */
- protected abstract MetaData getMeta(URL source, InputStream in)
- throws IOException;
-
- /**
- * Return the story description.
- *
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
- *
- * @return the description
- *
- * @throws IOException
- * in case of I/O error
- */
- protected abstract String getDesc(URL source, InputStream in)
- throws IOException;
-
- /**
- * Return the list of chapters (name and resource).
- *
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
- * @param pg
- * the optional progress reporter
- *
- * @return the chapters
- *
- * @throws IOException
- * in case of I/O error
- */
- protected abstract List
- * You are expected to call the super method implementation if you override
- * it.
- *
- * @return the cookies
- */
- public Map |
- * The resulting list will not contain a starting or trailing blank/break
- * nor 2 blanks or breaks following each other.
- *
- * @param paras
- * the list of {@link Paragraph}s to fix
- */
- protected void fixBlanksBreaks(List
- * Can return NULL, in which case you are supposed to work without an
- * {@link InputStream}.
- *
- * @param source
- * the source {@link URL}
- *
- * @return the {@link InputStream}
- *
- * @throws IOException
- * in case of I/O error
- */
- protected InputStream openInput(URL source) throws IOException {
- return Instance.getCache().open(source, this, false);
- }
-
- /**
- * Reset then return {@link BasicSupport#in}.
- *
- * @return {@link BasicSupport#in}
- */
- protected InputStream getInput() {
- return reset(in);
- }
-
- /**
- * Fix the author name if it is prefixed with some "by" {@link String}.
- *
- * @param author
- * the author with a possible prefix
- *
- * @return the author without prefixes
- */
- protected String fixAuthor(String author) {
- if (author != null) {
- for (String suffix : new String[] { " ", ":" }) {
- for (String byString : Instance.getConfig()
- .getString(Config.BYS).split(",")) {
- byString += suffix;
- if (author.toUpperCase().startsWith(byString.toUpperCase())) {
- author = author.substring(byString.length()).trim();
- }
- }
- }
-
- // Special case (without suffix):
- if (author.startsWith("©")) {
- author = author.substring(1);
- }
- }
-
- return author;
- }
-
- /**
- * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
- * and requotify them (i.e., separate them into QUOTE paragraphs and other
- * paragraphs (quotes or not)).
- *
- * @param para
- * the paragraph to requotify (not necessarily a quote)
- *
- * @return the correctly (or so we hope) quotified paragraphs
- */
- protected List
- * Will also fix quotes and HTML encoding if needed.
- *
- * @param line
- * the raw line
- *
- * @return the processed {@link Paragraph}
- */
- protected Paragraph processPara(String line) {
- line = ifUnhtml(line).trim();
-
- boolean space = true;
- boolean brk = true;
- boolean quote = false;
- boolean tentativeCloseQuote = false;
- char prev = '\0';
- int dashCount = 0;
- long words = 1;
-
- StringBuilder builder = new StringBuilder();
- for (char car : line.toCharArray()) {
- if (car != '-') {
- if (dashCount > 0) {
- // dash, ndash and mdash: - â â
- // currently: always use mdash
- builder.append(dashCount == 1 ? '-' : 'â');
- }
- dashCount = 0;
- }
-
- if (tentativeCloseQuote) {
- tentativeCloseQuote = false;
- if (Character.isLetterOrDigit(car)) {
- builder.append("'");
- } else {
- // handle double-single quotes as double quotes
- if (prev == car) {
- builder.append(closeDoubleQuote);
- continue;
- }
-
- builder.append(closeQuote);
- }
- }
-
- switch (car) {
- case 'Â ': // note: unbreakable space
- case ' ':
- case '\t':
- case '\n': // just in case
- case '\r': // just in case
- if (builder.length() > 0
- && builder.charAt(builder.length() - 1) != ' ') {
- words++;
- }
- builder.append(' ');
- break;
-
- case '\'':
- if (space || (brk && quote)) {
- quote = true;
- // handle double-single quotes as double quotes
- if (prev == car) {
- builder.deleteCharAt(builder.length() - 1);
- builder.append(openDoubleQuote);
- } else {
- builder.append(openQuote);
- }
- } else if (prev == ' ' || prev == car) {
- // handle double-single quotes as double quotes
- if (prev == car) {
- builder.deleteCharAt(builder.length() - 1);
- builder.append(openDoubleQuote);
- } else {
- builder.append(openQuote);
- }
- } else {
- // it is a quote ("I'm off") or a 'quote' ("This
- // 'good' restaurant"...)
- tentativeCloseQuote = true;
- }
- break;
-
- case '"':
- if (space || (brk && quote)) {
- quote = true;
- builder.append(openDoubleQuote);
- } else if (prev == ' ') {
- builder.append(openDoubleQuote);
- } else {
- builder.append(closeDoubleQuote);
- }
- break;
-
- case '-':
- if (space) {
- quote = true;
- } else {
- dashCount++;
- }
- space = false;
- break;
-
- case '*':
- case '~':
- case '/':
- case '\\':
- case '<':
- case '>':
- case '=':
- case '+':
- case '_':
- case 'â':
- case 'â':
- space = false;
- builder.append(car);
- break;
-
- case 'â':
- case '`':
- case 'â¹':
- case 'ï¹':
- case 'ã':
- case 'ã':
- if (space || (brk && quote)) {
- quote = true;
- builder.append(openQuote);
- } else {
- // handle double-single quotes as double quotes
- if (prev == car) {
- builder.deleteCharAt(builder.length() - 1);
- builder.append(openDoubleQuote);
- } else {
- builder.append(openQuote);
- }
- }
- space = false;
- brk = false;
- break;
-
- case 'â':
- case 'âº':
- case 'ï¹':
- case 'ã':
- case 'ã':
- space = false;
- brk = false;
- // handle double-single quotes as double quotes
- if (prev == car) {
- builder.deleteCharAt(builder.length() - 1);
- builder.append(closeDoubleQuote);
- } else {
- builder.append(closeQuote);
- }
- break;
-
- case '«':
- case 'â':
- case 'ï¹':
- case 'ã':
- case 'ã':
- if (space || (brk && quote)) {
- quote = true;
- builder.append(openDoubleQuote);
- } else {
- builder.append(openDoubleQuote);
- }
- space = false;
- brk = false;
- break;
-
- case '»':
- case 'â':
- case 'ï¹':
- case 'ã':
- case 'ã':
- space = false;
- brk = false;
- builder.append(closeDoubleQuote);
- break;
-
- default:
- space = false;
- brk = false;
- builder.append(car);
- break;
- }
-
- prev = car;
- }
-
- if (tentativeCloseQuote) {
- tentativeCloseQuote = false;
- builder.append(closeQuote);
- }
-
- line = builder.toString().trim();
-
- ParagraphType type = ParagraphType.NORMAL;
- if (space) {
- type = ParagraphType.BLANK;
- } else if (brk) {
- type = ParagraphType.BREAK;
- } else if (quote) {
- type = ParagraphType.QUOTE;
- }
-
- return new Paragraph(type, line, words);
- }
-
- /**
- * Remove the HTML from the input if {@link BasicSupport#isHtml()} is
- * true.
- *
- * @param input
- * the input
- *
- * @return the no html version if needed
- */
- private String ifUnhtml(String input) {
- if (isHtml() && input != null) {
- return StringUtils.unhtml(input);
- }
-
- return input;
- }
-
- /**
- * Return a {@link BasicSupport} implementation supporting the given
- * resource if possible.
- *
- * @param url
- * the story resource
- *
- * @return an implementation that supports it, or NULL
- */
- public static BasicSupport getSupport(URL url) {
- if (url == null) {
- return null;
- }
-
- // TEXT and INFO_TEXT always support files (not URLs though)
- for (SupportType type : SupportType.values()) {
- if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
- BasicSupport support = getSupport(type);
- if (support != null && support.supports(url)) {
- return support;
- }
- }
- }
-
- for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
- SupportType.TEXT }) {
- BasicSupport support = getSupport(type);
- if (support != null && support.supports(url)) {
- return support;
- }
- }
-
- return null;
- }
-
- /**
- * Return a {@link BasicSupport} implementation supporting the given type.
- *
- * @param type
- * the type
- *
- * @return an implementation that supports it, or NULL
- */
- public static BasicSupport getSupport(SupportType type) {
- switch (type) {
- case EPUB:
- return new Epub().setType(type);
- case INFO_TEXT:
- return new InfoText().setType(type);
- case FIMFICTION:
- try {
- // Can fail if no client key or NO in options
- return new FimfictionApi().setType(type);
- } catch (IOException e) {
- return new Fimfiction().setType(type);
- }
- case FANFICTION:
- return new Fanfiction().setType(type);
- case TEXT:
- return new Text().setType(type);
- case MANGAFOX:
- return new MangaFox().setType(type);
- case E621:
- return new E621().setType(type);
- case YIFFSTAR:
- return new YiffStar().setType(type);
- case E_HENTAI:
- return new EHentai().setType(type);
- case CBZ:
- return new Cbz().setType(type);
- case HTML:
- return new Html().setType(type);
- }
-
- return null;
- }
-
- /**
- * Reset the given {@link InputStream} and return it.
- *
- * @param in
- * the {@link InputStream} to reset
- *
- * @return the same {@link InputStream} after reset
- */
- static protected InputStream reset(InputStream in) {
- try {
- if (in != null) {
- in.reset();
- }
- } catch (IOException e) {
- }
-
- return in;
- }
-
- /**
- * Return the first line from the given input which correspond to the given
- * selectors.
- *
- * @param in
- * the input
- * @param needle
- * a string that must be found inside the target line (also
- * supports "^" at start to say "only if it starts with" the
- * needle)
- * @param relativeLine
- * the line to return based upon the target line position (-1 =
- * the line before, 0 = the target line...)
- *
- * @return the line
- */
- static protected String getLine(InputStream in, String needle,
- int relativeLine) {
- return getLine(in, needle, relativeLine, true);
- }
-
- /**
- * Return a line from the given input which correspond to the given
- * selectors.
- *
- * @param in
- * the input
- * @param needle
- * a string that must be found inside the target line (also
- * supports "^" at start to say "only if it starts with" the
- * needle)
- * @param relativeLine
- * the line to return based upon the target line position (-1 =
- * the line before, 0 = the target line...)
- * @param first
- * takes the first result (as opposed to the last one, which will
- * also always spend the input)
- *
- * @return the line
- */
- static protected String getLine(InputStream in, String needle,
- int relativeLine, boolean first) {
- String rep = null;
-
- reset(in);
-
- List
- * Will only match the first line with the given key if more than one are
- * possible. Which also means that if the subKey or endKey is not found on
- * that line, NULL will be returned.
- *
- * @param in
- * the input
- * @param key
- * the key to match (also supports "^" at start to say
- * "only if it starts with" the key)
- * @param subKey
- * the sub key or NULL if none
- * @param endKey
- * the end key or NULL for "up to the end"
- * @return the text or NULL if not found
- */
- static protected String getKeyLine(InputStream in, String key,
- String subKey, String endKey) {
- return getKeyText(getLine(in, key, 0), key, subKey, endKey);
- }
-
- /**
- * Return the text between the key and the endKey (and optional subKey can
- * be passed, in this case we will look for the key first, then take the
- * text between the subKey and the endKey).
- *
- * @param in
- * the input
- * @param key
- * the key to match (also supports "^" at start to say
- * "only if it starts with" the key)
- * @param subKey
- * the sub key or NULL if none
- * @param endKey
- * the end key or NULL for "up to the end"
- * @return the text or NULL if not found
- */
- static protected String getKeyText(String in, String key, String subKey,
- String endKey) {
- String result = null;
-
- String line = in;
- if (line != null && line.contains(key)) {
- line = line.substring(line.indexOf(key) + key.length());
- if (subKey == null || subKey.isEmpty() || line.contains(subKey)) {
- if (subKey != null) {
- line = line.substring(line.indexOf(subKey)
- + subKey.length());
- }
- if (endKey == null || line.contains(endKey)) {
- if (endKey != null) {
- line = line.substring(0, line.indexOf(endKey));
- result = line;
- }
- }
- }
- }
-
- return result;
- }
-
- /**
- * Return the text between the key and the endKey (optional subKeys can be
- * passed, in this case we will look for the subKeys first, then take the
- * text between the key and the endKey).
- *
- * @param in
- * the input
- * @param key
- * the key to match
- * @param endKey
- * the end key or NULL for "up to the end"
- * @param afters
- * the sub-keys to find before checking for key/endKey
- *
- * @return the text or NULL if not found
- */
- static protected String getKeyTextAfter(String in, String key,
- String endKey, String... afters) {
-
- if (in != null && !in.isEmpty()) {
- int pos = indexOfAfter(in, 0, afters);
- if (pos < 0) {
- return null;
- }
-
- in = in.substring(pos);
- }
-
- return getKeyText(in, key, null, endKey);
- }
-
- /**
- * Return the first index after all the given "afters" have been found in
- * the {@link String}, or -1 if it was not possible.
- *
- * @param in
- * the input
- * @param startAt
- * start at this position in the string
- * @param afters
- * the sub-keys to find before checking for key/endKey
- *
- * @return the text or NULL if not found
- */
- static protected int indexOfAfter(String in, int startAt, String... afters) {
- int pos = -1;
- if (in != null && !in.isEmpty()) {
- pos = startAt;
- if (afters != null) {
- for (int i = 0; pos >= 0 && i < afters.length; i++) {
- String subKey = afters[i];
- if (!subKey.isEmpty()) {
- pos = in.indexOf(subKey, pos);
- if (pos >= 0) {
- pos += subKey.length();
- }
- }
- }
- }
- }
-
- return pos;
- }
-}
processing:
- content = content.replaceAll("(
]*>)|(
)|(
)",
- "
* * *
");
- }
-
- List
|
)");
- pg.setMinMax(0, tab.length);
- int i = 1;
- for (String line : tab) {
- if (line.startsWith("[") && line.endsWith("]")) {
- pg.setName("Extracting image " + i);
- }
- paras.add(makeParagraph(source, line.trim()));
- pg.setProgress(i++);
- }
- pg.setName(null);
- } else {
- List