X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;ds=sidebyside;f=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FBasicSupport.java;h=ba2164cacac6f077ab1f4d1c97e23b9d9a56f31d;hb=5788cc5b5040784e7505fc52f7d79baf99075869;hp=74f11156e82c265a0eee1ddbd09b7c417a98ed12;hpb=08fe2e33007063e30fe22dc1d290f8afaa18eb1d;p=fanfix.git diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport.java b/src/be/nikiroo/fanfix/supported/BasicSupport.java deleted file mode 100644 index 74f1115..0000000 --- a/src/be/nikiroo/fanfix/supported/BasicSupport.java +++ /dev/null @@ -1,1292 +0,0 @@ -package be.nikiroo.fanfix.supported; - -import java.io.ByteArrayInputStream; -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.net.MalformedURLException; -import java.net.URL; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Map.Entry; -import java.util.Scanner; - -import be.nikiroo.fanfix.Instance; -import be.nikiroo.fanfix.bundles.Config; -import be.nikiroo.fanfix.bundles.StringId; -import be.nikiroo.fanfix.data.Chapter; -import be.nikiroo.fanfix.data.MetaData; -import be.nikiroo.fanfix.data.Paragraph; -import be.nikiroo.fanfix.data.Story; -import be.nikiroo.fanfix.data.Paragraph.ParagraphType; -import be.nikiroo.utils.StringUtils; - -/** - * This class is the base class used by the other support classes. It can be - * used outside of this package, and have static method that you can use to get - * access to the correct support class. - *
- * It will be used with 'resources' (usually web pages or files). - * - * @author niki - */ -public abstract class BasicSupport { - /** - * The supported input types for which we can get a {@link BasicSupport} - * object. - * - * @author niki - */ - public enum SupportType { - /** EPUB files created with this program */ - EPUB, - /** Pure text file with some rules */ - TEXT, - /** TEXT but with associated .info file */ - INFO_TEXT, - /** My Little Pony fanfictions */ - FIMFICTION, - /** Fanfictions from a lot of different universes */ - FANFICTION, - /** Website with lots of Mangas */ - MANGAFOX, - /** Furry website with comics support */ - E621, - /** CBZ files */ - CBZ; - - /** - * A description of this support type (more information than the - * {@link BasicSupport#getSourceName()}). - * - * @return the description - */ - public String getDesc() { - String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC, - this.name()); - - if (desc == null) { - desc = Instance.getTrans().getString(StringId.INPUT_DESC, this); - } - - return desc; - } - - /** - * The name of this support type (a short version). - * - * @return the name - */ - public String getSourceName() { - BasicSupport support = BasicSupport.getSupport(this); - if (support != null) { - return support.getSourceName(); - } - - return null; - } - - @Override - public String toString() { - return super.toString().toLowerCase(); - } - - /** - * Call {@link SupportType#valueOf(String.toUpperCase())}. - * - * @param typeName - * the possible type name - * - * @return NULL or the type - */ - public static SupportType valueOfUC(String typeName) { - return SupportType.valueOf(typeName == null ? null : typeName - .toUpperCase()); - } - - /** - * Call {@link SupportType#valueOf(String.toUpperCase())} but return - * NULL for NULL instead of raising exception. - * - * @param typeName - * the possible type name - * - * @return NULL or the type - */ - public static SupportType valueOfNullOkUC(String typeName) { - if (typeName == null) { - return null; - } - - return SupportType.valueOfUC(typeName); - } - - /** - * Call {@link SupportType#valueOf(String.toUpperCase())} but return - * NULL in case of error instead of raising an exception. - * - * @param typeName - * the possible type name - * - * @return NULL or the type - */ - public static SupportType valueOfAllOkUC(String typeName) { - try { - return SupportType.valueOfUC(typeName); - } catch (Exception e) { - return null; - } - } - } - - /** Only used by {@link BasicSupport#getInput()} just so it is always reset. */ - private InputStream in; - private SupportType type; - private URL currentReferer; // with on 'r', as in 'HTTP'... - - // quote chars - private char openQuote = Instance.getTrans().getChar( - StringId.OPEN_SINGLE_QUOTE); - private char closeQuote = Instance.getTrans().getChar( - StringId.CLOSE_SINGLE_QUOTE); - private char openDoubleQuote = Instance.getTrans().getChar( - StringId.OPEN_DOUBLE_QUOTE); - private char closeDoubleQuote = Instance.getTrans().getChar( - StringId.CLOSE_DOUBLE_QUOTE); - - /** - * The name of this support class. - * - * @return the name - */ - protected abstract String getSourceName(); - - /** - * Check if the given resource is supported by this {@link BasicSupport}. - * - * @param url - * the resource to check for - * - * @return TRUE if it is - */ - protected abstract boolean supports(URL url); - - /** - * Return TRUE if the support will return HTML encoded content values for - * the chapters content. - * - * @return TRUE for HTML - */ - protected abstract boolean isHtml(); - - /** - * Return the story title. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the title - * - * @throws IOException - * in case of I/O error - */ - protected abstract String getTitle(URL source, InputStream in) - throws IOException; - - /** - * Return the story author. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the author - * - * @throws IOException - * in case of I/O error - */ - protected abstract String getAuthor(URL source, InputStream in) - throws IOException; - - /** - * Return the story publication date. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the date - * - * @throws IOException - * in case of I/O error - */ - protected abstract String getDate(URL source, InputStream in) - throws IOException; - - /** - * Return the subject of the story (for instance, if it is a fanfiction, - * what is the original work; if it is a technical text, what is the - * technical subject...). - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the subject - * - * @throws IOException - * in case of I/O error - */ - protected abstract String getSubject(URL source, InputStream in) - throws IOException; - - /** - * Return the story description. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the description - * - * @throws IOException - * in case of I/O error - */ - protected abstract String getDesc(URL source, InputStream in) - throws IOException; - - /** - * Return the story cover resource if any, or NULL if none. - *
- * The default cover should not be checked for here.
- *
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
- *
- * @return the cover or NULL
- *
- * @throws IOException
- * in case of I/O error
- */
- protected abstract URL getCover(URL source, InputStream in)
- throws IOException;
-
- /**
- * Return the list of chapters (name and resource).
- *
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
- *
- * @return the chapters
- *
- * @throws IOException
- * in case of I/O error
- */
- protected abstract List
- * You are expected to call the super method implementation if you override
- * it.
- *
- * @return the cookies
- */
- public Map
- * By default, this is the {@link URL} of the resource.
- *
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
- *
- * @return the uuid
- *
- * @throws IOException
- * in case of I/O error
- */
- protected String getUuid(URL source, InputStream in) throws IOException {
- return source.toString();
- }
-
- /**
- * Return the story Library UID, a unique value representing the story (it
- * is often a number) in the local library.
- *
- * By default, this is empty.
- *
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
- *
- * @return the id
- *
- * @throws IOException
- * in case of I/O error
- */
- protected String getLuid(URL source, InputStream in) throws IOException {
- return "";
- }
-
- /**
- * Return the 2-letter language code of this story.
- *
- * By default, this is 'EN'.
- *
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
- *
- * @return the language
- *
- * @throws IOException
- * in case of I/O error
- */
- protected String getLang(URL source, InputStream in) throws IOException {
- return "EN";
- }
-
- /**
- * Return the list of tags for this story.
- *
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
- *
- * @return the tags
- *
- * @throws IOException
- * in case of I/O error
- */
- protected List
- * Do not reset the input, which will be pointing at the line just after the
- * result (input will be spent if no result is found).
- *
- * @param in
- * the input
- * @param needle
- * a string that must be found inside the target line
- * @param relativeLine
- * the line to return based upon the target line position (-1 =
- * the line before, 0 = the target line...)
- *
- * @return the line
- */
- protected String getLine(InputStream in, String needle, int relativeLine) {
- return getLine(in, needle, relativeLine, true);
- }
-
- /**
- * Return a line from the given input which correspond to the given
- * selectors.
- *
- * Do not reset the input, which will be pointing at the line just after the
- * result (input will be spent if no result is found) when first is TRUE,
- * and will always be spent if first is FALSE.
- *
- * @param in
- * the input
- * @param needle
- * a string that must be found inside the target line
- * @param relativeLine
- * the line to return based upon the target line position (-1 =
- * the line before, 0 = the target line...)
- * @param first
- * takes the first result (as opposed to the last one, which will
- * also always spend the input)
- *
- * @return the line
- */
- protected String getLine(InputStream in, String needle, int relativeLine,
- boolean first) {
- String rep = null;
-
- List
- * Will also fix quotes and HTML encoding if needed.
- *
- * @param line
- * the raw line
- *
- * @return the processed {@link Paragraph}
- */
- private Paragraph processPara(String line) {
- line = ifUnhtml(line).trim();
-
- boolean space = true;
- boolean brk = true;
- boolean quote = false;
- boolean tentativeCloseQuote = false;
- char prev = '\0';
- int dashCount = 0;
-
- StringBuilder builder = new StringBuilder();
- for (char car : line.toCharArray()) {
- if (car != '-') {
- if (dashCount > 0) {
- // dash, ndash and mdash: - â â
- // currently: always use mdash
- builder.append(dashCount == 1 ? '-' : 'â');
- }
- dashCount = 0;
- }
-
- if (tentativeCloseQuote) {
- tentativeCloseQuote = false;
- if ((car >= 'a' && car <= 'z') || (car >= 'A' && car <= 'Z')
- || (car >= '0' && car <= '9')) {
- builder.append("'");
- } else {
- builder.append(closeQuote);
- }
- }
-
- switch (car) {
- case 'Â ': // note: unbreakable space
- case ' ':
- case '\t':
- case '\n': // just in case
- case '\r': // just in case
- builder.append(' ');
- break;
-
- case '\'':
- if (space || (brk && quote)) {
- quote = true;
- builder.append(openQuote);
- } else if (prev == ' ') {
- builder.append(openQuote);
- } else {
- // it is a quote ("I'm off") or a 'quote' ("This
- // 'good' restaurant"...)
- tentativeCloseQuote = true;
- }
- break;
-
- case '"':
- if (space || (brk && quote)) {
- quote = true;
- builder.append(openDoubleQuote);
- } else if (prev == ' ') {
- builder.append(openDoubleQuote);
- } else {
- builder.append(closeDoubleQuote);
- }
- break;
-
- case '-':
- if (space) {
- quote = true;
- } else {
- dashCount++;
- }
- space = false;
- break;
-
- case '*':
- case '~':
- case '/':
- case '\\':
- case '<':
- case '>':
- case '=':
- case '+':
- case '_':
- case 'â':
- case 'â':
- space = false;
- builder.append(car);
- break;
-
- case 'â':
- case '`':
- case 'â¹':
- case 'ï¹':
- case 'ã':
- case 'ã':
- if (space || (brk && quote)) {
- quote = true;
- builder.append(openQuote);
- } else {
- builder.append(openQuote);
- }
- space = false;
- brk = false;
- break;
-
- case 'â':
- case 'âº':
- case 'ï¹':
- case 'ã':
- case 'ã':
- space = false;
- brk = false;
- builder.append(closeQuote);
- break;
-
- case '«':
- case 'â':
- case 'ï¹':
- case 'ã':
- case 'ã':
- if (space || (brk && quote)) {
- quote = true;
- builder.append(openDoubleQuote);
- } else {
- builder.append(openDoubleQuote);
- }
- space = false;
- brk = false;
- break;
-
- case '»':
- case 'â':
- case 'ï¹':
- case 'ã':
- case 'ã':
- space = false;
- brk = false;
- builder.append(closeDoubleQuote);
- break;
-
- default:
- space = false;
- brk = false;
- builder.append(car);
- break;
- }
-
- prev = car;
- }
-
- if (tentativeCloseQuote) {
- tentativeCloseQuote = false;
- builder.append(closeQuote);
- }
-
- line = builder.toString().trim();
-
- ParagraphType type = ParagraphType.NORMAL;
- if (space) {
- type = ParagraphType.BLANK;
- } else if (brk) {
- type = ParagraphType.BREAK;
- } else if (quote) {
- type = ParagraphType.QUOTE;
- }
-
- return new Paragraph(type, line);
- }
-
- /**
- * Remove the HTML from the inpit if {@link BasicSupport#isHtml()} is
- * true.
- *
- * @param input
- * the input
- *
- * @return the no html version if needed
- */
- private String ifUnhtml(String input) {
- if (isHtml() && input != null) {
- return StringUtils.unhtml(input);
- }
-
- return input;
- }
-
- /**
- * Return a {@link BasicSupport} implementation supporting the given
- * resource if possible.
- *
- * @param url
- * the story resource
- *
- * @return an implementation that supports it, or NULL
- */
- public static BasicSupport getSupport(URL url) {
- if (url == null) {
- return null;
- }
-
- // TEXT and INFO_TEXT always support files (not URLs though)
- for (SupportType type : SupportType.values()) {
- if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
- BasicSupport support = getSupport(type);
- if (support != null && support.supports(url)) {
- return support;
- }
- }
- }
-
- for (SupportType type : new SupportType[] { SupportType.TEXT,
- SupportType.INFO_TEXT }) {
- BasicSupport support = getSupport(type);
- if (support != null && support.supports(url)) {
- return support;
- }
- }
-
- return null;
- }
-
- /**
- * Return a {@link BasicSupport} implementation supporting the given type.
- *
- * @param type
- * the type
- *
- * @return an implementation that supports it, or NULL
- */
- public static BasicSupport getSupport(SupportType type) {
- switch (type) {
- case EPUB:
- return new Epub().setType(type);
- case INFO_TEXT:
- return new InfoText().setType(type);
- case FIMFICTION:
- return new Fimfiction().setType(type);
- case FANFICTION:
- return new Fanfiction().setType(type);
- case TEXT:
- return new Text().setType(type);
- case MANGAFOX:
- return new MangaFox().setType(type);
- case E621:
- return new E621().setType(type);
- case CBZ:
- return new Cbz().setType(type);
- }
-
- return null;
- }
-}
processing:
- content = content.replaceAll("(
]*>)|(
)|(
)",
- "\n* * *\n");
- }
-
- InputStream in = new ByteArrayInputStream(
- content.getBytes(StandardCharsets.UTF_8));
- try {
- @SuppressWarnings("resource")
- Scanner scan = new Scanner(in, "UTF-8");
- scan.useDelimiter("(\\n|