X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FBasicSupport.java;h=c35ed86b65b564d4e905c9c635ded5804bd038e4;hb=7445f8565be9e9237ffb3e16fd4dcb61f8c36cd5;hp=c0419aa46c4c2d61986f2742cb3099fbcbab3840;hpb=f0608ab10e762c1aed9608720b97c1901b5e2614;p=fanfix.git diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport.java b/src/be/nikiroo/fanfix/supported/BasicSupport.java index c0419aa..c35ed86 100644 --- a/src/be/nikiroo/fanfix/supported/BasicSupport.java +++ b/src/be/nikiroo/fanfix/supported/BasicSupport.java @@ -1,13 +1,7 @@ package be.nikiroo.fanfix.supported; -import java.awt.image.BufferedImage; -import java.io.BufferedReader; -import java.io.ByteArrayInputStream; -import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; -import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.Date; @@ -15,17 +9,17 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; -import java.util.Scanner; + +import org.jsoup.helper.DataUtil; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; import be.nikiroo.fanfix.Instance; -import be.nikiroo.fanfix.bundles.Config; import be.nikiroo.fanfix.bundles.StringId; import be.nikiroo.fanfix.data.Chapter; import be.nikiroo.fanfix.data.MetaData; -import be.nikiroo.fanfix.data.Paragraph; -import be.nikiroo.fanfix.data.Paragraph.ParagraphType; import be.nikiroo.fanfix.data.Story; -import be.nikiroo.utils.IOUtils; import be.nikiroo.utils.Progress; import be.nikiroo.utils.StringUtils; @@ -39,134 +33,11 @@ import be.nikiroo.utils.StringUtils; * @author niki */ public abstract class BasicSupport { - /** - * The supported input types for which we can get a {@link BasicSupport} - * object. - * - * @author niki - */ - public enum SupportType { - /** EPUB files created with this program */ - EPUB, - /** Pure text file with some rules */ - TEXT, - /** TEXT but with associated .info file */ - INFO_TEXT, - /** My Little Pony fanfictions */ - FIMFICTION, - /** Fanfictions from a lot of different universes */ - FANFICTION, - /** Website with lots of Mangas */ - MANGAFOX, - /** Furry website with comics support */ - E621, - /** Furry website with stories */ - YIFFSTAR, - /** Comics and images groups, mostly but not only NSFW */ - E_HENTAI, - /** CBZ files */ - CBZ, - /** HTML files */ - HTML; - - /** - * A description of this support type (more information than the - * {@link BasicSupport#getSourceName()}). - * - * @return the description - */ - public String getDesc() { - String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC, - this.name()); - - if (desc == null) { - desc = Instance.getTrans().getString(StringId.INPUT_DESC, this); - } - - return desc; - } - - /** - * The name of this support type (a short version). - * - * @return the name - */ - public String getSourceName() { - BasicSupport support = BasicSupport.getSupport(this); - if (support != null) { - return support.getSourceName(); - } - - return null; - } - - @Override - public String toString() { - return super.toString().toLowerCase(); - } - - /** - * Call {@link SupportType#valueOf(String.toUpperCase())}. - * - * @param typeName - * the possible type name - * - * @return NULL or the type - */ - public static SupportType valueOfUC(String typeName) { - return SupportType.valueOf(typeName == null ? null : typeName - .toUpperCase()); - } - - /** - * Call {@link SupportType#valueOf(String.toUpperCase())} but return - * NULL for NULL instead of raising exception. - * - * @param typeName - * the possible type name - * - * @return NULL or the type - */ - public static SupportType valueOfNullOkUC(String typeName) { - if (typeName == null) { - return null; - } - - return SupportType.valueOfUC(typeName); - } - - /** - * Call {@link SupportType#valueOf(String.toUpperCase())} but return - * NULL in case of error instead of raising an exception. - * - * @param typeName - * the possible type name - * - * @return NULL or the type - */ - public static SupportType valueOfAllOkUC(String typeName) { - try { - return SupportType.valueOfUC(typeName); - } catch (Exception e) { - return null; - } - } - } - - private InputStream in; + private Document sourceNode; + private URL source; private SupportType type; private URL currentReferer; // with only one 'r', as in 'HTTP'... - // quote chars - private char openQuote = Instance.getTrans().getCharacter( - StringId.OPEN_SINGLE_QUOTE); - private char closeQuote = Instance.getTrans().getCharacter( - StringId.CLOSE_SINGLE_QUOTE); - private char openDoubleQuote = Instance.getTrans().getCharacter( - StringId.OPEN_DOUBLE_QUOTE); - private char closeDoubleQuote = Instance.getTrans().getCharacter( - StringId.CLOSE_DOUBLE_QUOTE); - /** * The name of this support class. * @@ -192,51 +63,48 @@ public abstract class BasicSupport { */ protected abstract boolean isHtml(); - protected abstract MetaData getMeta(URL source, InputStream in) - throws IOException; + /** + * Return the {@link MetaData} of this story. + * + * @return the associated {@link MetaData}, never NULL + * + * @throws IOException + * in case of I/O error + */ + protected abstract MetaData getMeta() throws IOException; /** * Return the story description. * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * * @return the description * * @throws IOException * in case of I/O error */ - protected abstract String getDesc(URL source, InputStream in) - throws IOException; + protected abstract String getDesc() throws IOException; /** - * Return the list of chapters (name and resource). + * Return the list of chapters (name and resource). * + *
+ * Can be NULL if this {@link BasicSupport} do no use chapters.
*
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
* @param pg
* the optional progress reporter
*
- * @return the chapters
+ * @return the chapters or NULL
*
* @throws IOException
* in case of I/O error
*/
- protected abstract List
+ * Can return NULL, in which case you are supposed to work without a source
+ * node.
+ *
+ * @param source
* the source {@link URL}
*
- * @return the canonical form of this {@link URL}
+ * @return the {@link InputStream}
*
* @throws IOException
* in case of I/O error
*/
- public URL getCanonicalUrl(URL source) throws IOException {
- return source;
+ protected Document loadDocument(URL source) throws IOException {
+ String url = getCanonicalUrl(source).toString();
+ return DataUtil.load(Instance.getCache().open(source, this, false),
+ "UTF-8", url.toString());
+ }
+
+ /**
+ * Log into the support (can be a no-op depending upon the support).
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ protected void login() throws IOException {
+ }
+
+ /**
+ * Prepare the support if needed before processing.
+ *
+ * @throws IOException
+ * on I/O error
+ */
+ protected void preprocess() throws IOException {
+ }
+
+ /**
+ * Now that we have processed the {@link Story}, close the resources if any.
+ */
+ protected void close() {
+ setCurrentReferer(null);
}
/**
* Process the given story resource into a partially filled {@link Story}
* object containing the name and metadata, except for the description.
*
- * @param url
- * the story resource
- *
* @return the {@link Story}
*
* @throws IOException
* in case of I/O error
*/
- public Story processMeta(URL url) throws IOException {
- return processMeta(url, true, false, null);
+ public Story processMeta() throws IOException {
+ Story story = null;
+
+ preprocess();
+ try {
+ story = processMeta(false, null);
+ } finally {
+ close();
+ }
+
+ return story;
}
/**
* Process the given story resource into a partially filled {@link Story}
* object containing the name and metadata.
*
- * @param url
- * the story resource
- *
- * @param close
- * close "this" and "in" when done
+ * @param getDesc
+ * retrieve the description of the story, or not
* @param pg
* the optional progress reporter
*
- * @return the {@link Story}
+ * @return the {@link Story}, never NULL
*
* @throws IOException
* in case of I/O error
*/
- protected Story processMeta(URL url, boolean close, boolean getDesc,
- Progress pg) throws IOException {
+ protected Story processMeta(boolean getDesc, Progress pg)
+ throws IOException {
if (pg == null) {
pg = new Progress();
} else {
pg.setMinMax(0, 100);
}
- login();
- pg.setProgress(10);
-
- url = getCanonicalUrl(url);
+ pg.setProgress(30);
- setCurrentReferer(url);
-
- in = openInput(url);
- if (in == null) {
- return null;
+ Story story = new Story();
+ MetaData meta = getMeta();
+ if (meta.getCreationDate() == null || meta.getCreationDate().isEmpty()) {
+ meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
}
+ story.setMeta(meta);
- try {
- preprocess(url, getInput());
- pg.setProgress(30);
-
- Story story = new Story();
- MetaData meta = getMeta(url, getInput());
- if (meta.getCreationDate() == null
- || meta.getCreationDate().isEmpty()) {
- meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
- }
- story.setMeta(meta);
-
- pg.setProgress(50);
-
- if (meta != null && meta.getCover() == null) {
- meta.setCover(getDefaultCover(meta.getSubject()));
- }
-
- pg.setProgress(60);
-
- if (getDesc) {
- String descChapterName = Instance.getTrans().getString(
- StringId.DESCRIPTION);
- story.getMeta().setResume(
- makeChapter(url, 0, descChapterName,
- getDesc(url, getInput()), null));
- }
+ pg.setProgress(50);
- pg.setProgress(100);
- return story;
- } finally {
- if (close) {
- try {
- close();
- } catch (IOException e) {
- Instance.syserr(e);
- }
+ if (meta.getCover() == null) {
+ meta.setCover(BasicSupportHelper.getDefaultCover(meta.getSubject()));
+ }
- if (in != null) {
- in.close();
- }
- }
+ pg.setProgress(60);
- setCurrentReferer(null);
+ if (getDesc) {
+ String descChapterName = Instance.getTrans().getString(
+ StringId.DESCRIPTION);
+ story.getMeta().setResume(
+ BasicSupportPara.makeChapter(this, source, 0,
+ descChapterName, //
+ getDesc(), isHtml(), null));
}
+
+ pg.setProgress(100);
+ return story;
}
/**
* Process the given story resource into a fully filled {@link Story}
* object.
*
- * @param url
- * the story resource
* @param pg
* the optional progress reporter
*
- * @return the {@link Story}
+ * @return the {@link Story}, never NULL
*
* @throws IOException
* in case of I/O error
*/
- public Story process(URL url, Progress pg) throws IOException {
+ public Story process(Progress pg) throws IOException {
if (pg == null) {
pg = new Progress();
} else {
pg.setMinMax(0, 100);
}
- url = getCanonicalUrl(url);
+ setCurrentReferer(source);
+ login();
+ sourceNode = loadDocument(source);
+
pg.setProgress(1);
try {
Progress pgMeta = new Progress();
pg.addProgress(pgMeta, 10);
- Story story = processMeta(url, false, true, pgMeta);
+ preprocess();
+ Story story = processMeta(true, pgMeta);
if (!pgMeta.isDone()) {
pgMeta.setProgress(pgMeta.getMax()); // 10%
}
- if (story == null) {
- pg.setProgress(90);
- return null;
- }
-
pg.setName("Retrieving " + story.getMeta().getTitle());
- setCurrentReferer(url);
-
Progress pgGetChapters = new Progress();
pg.addProgress(pgGetChapters, 10);
story.setChapters(new ArrayList |
- * The resulting list will not contain a starting or trailing blank/break
- * nor 2 blanks or breaks following each other.
- *
- * @param paras
- * the list of {@link Paragraph}s to fix
- */
- protected void fixBlanksBreaks(List
- * Will also fix quotes and HTML encoding if needed.
- *
- * @param line
- * the raw line
- *
- * @return the processed {@link Paragraph}
- */
- protected Paragraph processPara(String line) {
- line = ifUnhtml(line).trim();
-
- boolean space = true;
- boolean brk = true;
- boolean quote = false;
- boolean tentativeCloseQuote = false;
- char prev = '\0';
- int dashCount = 0;
- long words = 1;
-
- StringBuilder builder = new StringBuilder();
- for (char car : line.toCharArray()) {
- if (car != '-') {
- if (dashCount > 0) {
- // dash, ndash and mdash: - â â
- // currently: always use mdash
- builder.append(dashCount == 1 ? '-' : 'â');
- }
- dashCount = 0;
- }
-
- if (tentativeCloseQuote) {
- tentativeCloseQuote = false;
- if (Character.isLetterOrDigit(car)) {
- builder.append("'");
- } else {
- // handle double-single quotes as double quotes
- if (prev == car) {
- builder.append(closeDoubleQuote);
- continue;
- } else {
- builder.append(closeQuote);
- }
- }
- }
-
- switch (car) {
- case 'Â ': // note: unbreakable space
- case ' ':
- case '\t':
- case '\n': // just in case
- case '\r': // just in case
- if (builder.length() > 0
- && builder.charAt(builder.length() - 1) != ' ') {
- words++;
- }
- builder.append(' ');
- break;
-
- case '\'':
- if (space || (brk && quote)) {
- quote = true;
- // handle double-single quotes as double quotes
- if (prev == car) {
- builder.deleteCharAt(builder.length() - 1);
- builder.append(openDoubleQuote);
- } else {
- builder.append(openQuote);
- }
- } else if (prev == ' ' || prev == car) {
- // handle double-single quotes as double quotes
- if (prev == car) {
- builder.deleteCharAt(builder.length() - 1);
- builder.append(openDoubleQuote);
- } else {
- builder.append(openQuote);
- }
- } else {
- // it is a quote ("I'm off") or a 'quote' ("This
- // 'good' restaurant"...)
- tentativeCloseQuote = true;
- }
- break;
-
- case '"':
- if (space || (brk && quote)) {
- quote = true;
- builder.append(openDoubleQuote);
- } else if (prev == ' ') {
- builder.append(openDoubleQuote);
- } else {
- builder.append(closeDoubleQuote);
- }
- break;
-
- case '-':
- if (space) {
- quote = true;
- } else {
- dashCount++;
- }
- space = false;
- break;
-
- case '*':
- case '~':
- case '/':
- case '\\':
- case '<':
- case '>':
- case '=':
- case '+':
- case '_':
- case 'â':
- case 'â':
- space = false;
- builder.append(car);
- break;
-
- case 'â':
- case '`':
- case 'â¹':
- case 'ï¹':
- case 'ã':
- case 'ã':
- if (space || (brk && quote)) {
- quote = true;
- builder.append(openQuote);
- } else {
- // handle double-single quotes as double quotes
- if (prev == car) {
- builder.deleteCharAt(builder.length() - 1);
- builder.append(openDoubleQuote);
- } else {
- builder.append(openQuote);
- }
- }
- space = false;
- brk = false;
- break;
-
- case 'â':
- case 'âº':
- case 'ï¹':
- case 'ã':
- case 'ã':
- space = false;
- brk = false;
- // handle double-single quotes as double quotes
- if (prev == car) {
- builder.deleteCharAt(builder.length() - 1);
- builder.append(closeDoubleQuote);
- } else {
- builder.append(closeQuote);
- }
- break;
-
- case '«':
- case 'â':
- case 'ï¹':
- case 'ã':
- case 'ã':
- if (space || (brk && quote)) {
- quote = true;
- builder.append(openDoubleQuote);
- } else {
- builder.append(openDoubleQuote);
- }
- space = false;
- brk = false;
- break;
-
- case '»':
- case 'â':
- case 'ï¹':
- case 'ã':
- case 'ã':
- space = false;
- brk = false;
- builder.append(closeDoubleQuote);
- break;
-
- default:
- space = false;
- brk = false;
- builder.append(car);
- break;
- }
-
- prev = car;
- }
-
- if (tentativeCloseQuote) {
- tentativeCloseQuote = false;
- builder.append(closeQuote);
- }
-
- line = builder.toString().trim();
-
- ParagraphType type = ParagraphType.NORMAL;
- if (space) {
- type = ParagraphType.BLANK;
- } else if (brk) {
- type = ParagraphType.BREAK;
- } else if (quote) {
- type = ParagraphType.QUOTE;
- }
-
- return new Paragraph(type, line, words);
- }
-
- /**
- * Remove the HTML from the input if {@link BasicSupport#isHtml()} is
- * true.
- *
- * @param input
- * the input
- *
- * @return the no html version if needed
- */
- private String ifUnhtml(String input) {
- if (isHtml() && input != null) {
- return StringUtils.unhtml(input);
- }
-
- return input;
- }
-
- /**
- * Return a {@link BasicSupport} implementation supporting the given
- * resource if possible.
- *
- * @param url
- * the story resource
- *
- * @return an implementation that supports it, or NULL
- */
- public static BasicSupport getSupport(URL url) {
- if (url == null) {
- return null;
- }
-
- // TEXT and INFO_TEXT always support files (not URLs though)
- for (SupportType type : SupportType.values()) {
- if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
- BasicSupport support = getSupport(type);
- if (support != null && support.supports(url)) {
- return support;
- }
- }
- }
-
- for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
- SupportType.TEXT }) {
- BasicSupport support = getSupport(type);
- if (support != null && support.supports(url)) {
- return support;
- }
- }
-
- return null;
- }
-
- /**
- * Return a {@link BasicSupport} implementation supporting the given type.
- *
- * @param type
- * the type
- *
- * @return an implementation that supports it, or NULL
- */
- public static BasicSupport getSupport(SupportType type) {
switch (type) {
case EPUB:
- return new Epub().setType(type);
+ support = new Epub();
+ break;
case INFO_TEXT:
- return new InfoText().setType(type);
+ support = new InfoText();
+ break;
case FIMFICTION:
- return new Fimfiction().setType(type);
+ try {
+ // Can fail if no client key or NO in options
+ support = new FimfictionApi();
+ } catch (IOException e) {
+ support = new Fimfiction();
+ }
+ break;
case FANFICTION:
- return new Fanfiction().setType(type);
+ support = new Fanfiction();
+ break;
case TEXT:
- return new Text().setType(type);
+ support = new Text();
+ break;
case MANGAFOX:
- return new MangaFox().setType(type);
+ support = new MangaFox();
+ break;
case E621:
- return new E621().setType(type);
+ support = new E621();
+ break;
case YIFFSTAR:
- return new YiffStar().setType(type);
+ support = new YiffStar();
+ break;
case E_HENTAI:
- return new EHentai().setType(type);
+ support = new EHentai();
+ break;
case CBZ:
- return new Cbz().setType(type);
+ support = new Cbz();
+ break;
case HTML:
- return new Html().setType(type);
- }
-
- return null;
- }
-
- /**
- * Return the first line from the given input which correspond to the given
- * selectors.
- *
- * @param in
- * the input
- * @param needle
- * a string that must be found inside the target line (also
- * supports "^" at start to say "only if it starts with" the
- * needle)
- * @param relativeLine
- * the line to return based upon the target line position (-1 =
- * the line before, 0 = the target line...)
- *
- * @return the line
- */
- static String getLine(InputStream in, String needle, int relativeLine) {
- return getLine(in, needle, relativeLine, true);
- }
-
- /**
- * Return a line from the given input which correspond to the given
- * selectors.
- *
- * @param in
- * the input
- * @param needle
- * a string that must be found inside the target line (also
- * supports "^" at start to say "only if it starts with" the
- * needle)
- * @param relativeLine
- * the line to return based upon the target line position (-1 =
- * the line before, 0 = the target line...)
- * @param first
- * takes the first result (as opposed to the last one, which will
- * also always spend the input)
- *
- * @return the line
- */
- static String getLine(InputStream in, String needle, int relativeLine,
- boolean first) {
- String rep = null;
-
- try {
- in.reset();
- } catch (IOException e) {
- Instance.syserr(e);
+ support = new Html();
+ break;
}
- List
- * Will only match the first line with the given key if more than one are
- * possible. Which also means that if the subKey or endKey is not found on
- * that line, NULL will be returned.
- *
- * @param in
- * the input
- * @param key
- * the key to match
- * @param subKey
- * the sub key or NULL if none
- * @param endKey
- * the end key or NULL for "up to the end"
- * @return the text or NULL if not found
- */
- static String getKeyLine(InputStream in, String key, String subKey,
- String endKey) {
- String result = null;
-
- String line = getLine(in, key, 0);
- if (line != null && line.contains(key)) {
- line = line.substring(line.indexOf(key) + key.length());
- if (subKey == null || subKey.isEmpty() || line.contains(subKey)) {
- if (subKey != null) {
- line = line.substring(line.indexOf(subKey)
- + subKey.length());
- }
- if (endKey == null || line.contains(endKey)) {
- if (endKey != null) {
- line = line.substring(0, line.indexOf(endKey));
- result = line;
- }
- }
- }
+ if (support != null) {
+ support.setType(type);
+ support.source = support.getCanonicalUrl(url);
}
- return result;
+ return support;
}
}
processing:
- content = content.replaceAll("(
]*>)|(
)|(
)",
- "
* * *
");
- }
-
- List
|
)");
- pg.setMinMax(0, tab.length);
- int i = 1;
- for (String line : tab) {
- if (line.startsWith("[") && line.endsWith("]")) {
- pg.setName("Extracting image " + i);
- }
- paras.add(makeParagraph(source, line.trim()));
- pg.setProgress(i++);
- }
- pg.setName(null);
- } else {
- List