X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FBasicSupport.java;h=c35ed86b65b564d4e905c9c635ded5804bd038e4;hb=7445f8565be9e9237ffb3e16fd4dcb61f8c36cd5;hp=169cc5f02d7a0a85cc4328227bd646c408d5a8e1;hpb=373da363323d3a9263aa6ebd392ca3272b23b412;p=fanfix.git diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport.java b/src/be/nikiroo/fanfix/supported/BasicSupport.java index 169cc5f..c35ed86 100644 --- a/src/be/nikiroo/fanfix/supported/BasicSupport.java +++ b/src/be/nikiroo/fanfix/supported/BasicSupport.java @@ -1,30 +1,25 @@ package be.nikiroo.fanfix.supported; -import java.awt.image.BufferedImage; -import java.io.BufferedReader; -import java.io.ByteArrayInputStream; -import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; -import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; +import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; -import java.util.Scanner; + +import org.jsoup.helper.DataUtil; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; import be.nikiroo.fanfix.Instance; -import be.nikiroo.fanfix.bundles.Config; import be.nikiroo.fanfix.bundles.StringId; import be.nikiroo.fanfix.data.Chapter; import be.nikiroo.fanfix.data.MetaData; -import be.nikiroo.fanfix.data.Paragraph; -import be.nikiroo.fanfix.data.Paragraph.ParagraphType; import be.nikiroo.fanfix.data.Story; -import be.nikiroo.utils.IOUtils; import be.nikiroo.utils.Progress; import be.nikiroo.utils.StringUtils; @@ -38,129 +33,10 @@ import be.nikiroo.utils.StringUtils; * @author niki */ public abstract class BasicSupport { - /** - * The supported input types for which we can get a {@link BasicSupport} - * object. - * - * @author niki - */ - public enum SupportType { - /** EPUB files created with this program */ - EPUB, - /** Pure text file with some rules */ - TEXT, - /** TEXT but with associated .info file */ - INFO_TEXT, - /** My Little Pony fanfictions */ - FIMFICTION, - /** Fanfictions from a lot of different universes */ - FANFICTION, - /** Website with lots of Mangas */ - MANGAFOX, - /** Furry website with comics support */ - E621, - /** CBZ files */ - CBZ, - /** HTML files */ - HTML; - - /** - * A description of this support type (more information than the - * {@link BasicSupport#getSourceName()}). - * - * @return the description - */ - public String getDesc() { - String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC, - this.name()); - - if (desc == null) { - desc = Instance.getTrans().getString(StringId.INPUT_DESC, this); - } - - return desc; - } - - /** - * The name of this support type (a short version). - * - * @return the name - */ - public String getSourceName() { - BasicSupport support = BasicSupport.getSupport(this); - if (support != null) { - return support.getSourceName(); - } - - return null; - } - - @Override - public String toString() { - return super.toString().toLowerCase(); - } - - /** - * Call {@link SupportType#valueOf(String.toUpperCase())}. - * - * @param typeName - * the possible type name - * - * @return NULL or the type - */ - public static SupportType valueOfUC(String typeName) { - return SupportType.valueOf(typeName == null ? null : typeName - .toUpperCase()); - } - - /** - * Call {@link SupportType#valueOf(String.toUpperCase())} but return - * NULL for NULL instead of raising exception. - * - * @param typeName - * the possible type name - * - * @return NULL or the type - */ - public static SupportType valueOfNullOkUC(String typeName) { - if (typeName == null) { - return null; - } - - return SupportType.valueOfUC(typeName); - } - - /** - * Call {@link SupportType#valueOf(String.toUpperCase())} but return - * NULL in case of error instead of raising an exception. - * - * @param typeName - * the possible type name - * - * @return NULL or the type - */ - public static SupportType valueOfAllOkUC(String typeName) { - try { - return SupportType.valueOfUC(typeName); - } catch (Exception e) { - return null; - } - } - } - - private InputStream in; + private Document sourceNode; + private URL source; private SupportType type; - private URL currentReferer; // with on 'r', as in 'HTTP'... - - // quote chars - private char openQuote = Instance.getTrans().getChar( - StringId.OPEN_SINGLE_QUOTE); - private char closeQuote = Instance.getTrans().getChar( - StringId.CLOSE_SINGLE_QUOTE); - private char openDoubleQuote = Instance.getTrans().getChar( - StringId.OPEN_DOUBLE_QUOTE); - private char closeDoubleQuote = Instance.getTrans().getChar( - StringId.CLOSE_DOUBLE_QUOTE); + private URL currentReferer; // with only one 'r', as in 'HTTP'... /** * The name of this support class. @@ -187,59 +63,60 @@ public abstract class BasicSupport { */ protected abstract boolean isHtml(); - protected abstract MetaData getMeta(URL source, InputStream in) - throws IOException; + /** + * Return the {@link MetaData} of this story. + * + * @return the associated {@link MetaData}, never NULL + * + * @throws IOException + * in case of I/O error + */ + protected abstract MetaData getMeta() throws IOException; /** * Return the story description. * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * * @return the description * * @throws IOException * in case of I/O error */ - protected abstract String getDesc(URL source, InputStream in) - throws IOException; + protected abstract String getDesc() throws IOException; /** - * Return the list of chapters (name and resource). + * Return the list of chapters (name and resource). * + *
+ * Can be NULL if this {@link BasicSupport} do no use chapters.
*
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
+ * @param pg
+ * the optional progress reporter
*
- * @return the chapters
+ * @return the chapters or NULL
*
* @throws IOException
* in case of I/O error
*/
- protected abstract List
+ * Can return NULL, in which case you are supposed to work without a source
+ * node.
*
* @param source
- * the source of the story
- * @param in
- * the input (the main resource)
+ * the source {@link URL}
+ *
+ * @return the {@link InputStream}
*
* @throws IOException
- * on I/O error
+ * in case of I/O error
*/
- protected void preprocess(URL source, InputStream in) throws IOException {
+ protected Document loadDocument(URL source) throws IOException {
+ String url = getCanonicalUrl(source).toString();
+ return DataUtil.load(Instance.getCache().open(source, this, false),
+ "UTF-8", url.toString());
}
/**
- * Now that we have processed the {@link Story}, close the resources if any.
+ * Log into the support (can be a no-op depending upon the support).
*
* @throws IOException
- * on I/O error
+ * in case of I/O error
*/
- protected void close() throws IOException {
+ protected void login() throws IOException {
}
/**
- * Create a {@link Chapter} object from the given information, formatting
- * the content as it should be.
- *
- * @param number
- * the chapter number
- * @param name
- * the chapter name
- * @param content
- * the chapter content
- *
- * @return the {@link Chapter}
+ * Prepare the support if needed before processing.
*
* @throws IOException
- * in case of I/O error
+ * on I/O error
*/
- protected Chapter makeChapter(URL source, int number, String name,
- String content) throws IOException {
- // Chapter name: process it correctly, then remove the possible
- // redundant "Chapter x: " in front of it
- String chapterName = processPara(name).getContent().trim();
- for (String lang : Instance.getConfig().getString(Config.CHAPTER)
- .split(",")) {
- String chapterWord = Instance.getConfig().getStringX(
- Config.CHAPTER, lang);
- if (chapterName.startsWith(chapterWord)) {
- chapterName = chapterName.substring(chapterWord.length())
- .trim();
- break;
- }
- }
-
- if (chapterName.startsWith(Integer.toString(number))) {
- chapterName = chapterName.substring(
- Integer.toString(number).length()).trim();
- }
-
- if (chapterName.startsWith(":")) {
- chapterName = chapterName.substring(1).trim();
- }
- //
-
- Chapter chap = new Chapter(number, chapterName);
-
- if (content != null) {
- chap.setParagraphs(makeParagraphs(source, content));
- }
-
- return chap;
+ protected void preprocess() throws IOException {
+ }
+ /**
+ * Now that we have processed the {@link Story}, close the resources if any.
+ */
+ protected void close() {
+ setCurrentReferer(null);
}
/**
- * Convert the given content into {@link Paragraph}s.
- *
- * @param source
- * the source URL of the story
- * @param content
- * the textual content
+ * Process the given story resource into a partially filled {@link Story}
+ * object containing the name and metadata, except for the description.
*
- * @return the {@link Paragraph}s
+ * @return the {@link Story}
*
* @throws IOException
* in case of I/O error
*/
- protected List |
- * The resulting list will not contain a starting or trailing blank/break
- * nor 2 blanks or breaks following each other.
- *
- * @param paras
- * the list of {@link Paragraph}s to fix
- */
- protected void fixBlanksBreaks(List
- * Will also fix quotes and HTML encoding if needed.
- *
- * @param line
- * the raw line
- *
- * @return the processed {@link Paragraph}
- */
- private Paragraph processPara(String line) {
- line = ifUnhtml(line).trim();
-
- boolean space = true;
- boolean brk = true;
- boolean quote = false;
- boolean tentativeCloseQuote = false;
- char prev = '\0';
- int dashCount = 0;
-
- StringBuilder builder = new StringBuilder();
- for (char car : line.toCharArray()) {
- if (car != '-') {
- if (dashCount > 0) {
- // dash, ndash and mdash: - â â
- // currently: always use mdash
- builder.append(dashCount == 1 ? '-' : 'â');
- }
- dashCount = 0;
- }
-
- if (tentativeCloseQuote) {
- tentativeCloseQuote = false;
- if ((car >= 'a' && car <= 'z') || (car >= 'A' && car <= 'Z')
- || (car >= '0' && car <= '9')) {
- builder.append("'");
- } else {
- builder.append(closeQuote);
- }
- }
-
- switch (car) {
- case 'Â ': // note: unbreakable space
- case ' ':
- case '\t':
- case '\n': // just in case
- case '\r': // just in case
- builder.append(' ');
- break;
-
- case '\'':
- if (space || (brk && quote)) {
- quote = true;
- builder.append(openQuote);
- } else if (prev == ' ') {
- builder.append(openQuote);
- } else {
- // it is a quote ("I'm off") or a 'quote' ("This
- // 'good' restaurant"...)
- tentativeCloseQuote = true;
- }
- break;
-
- case '"':
- if (space || (brk && quote)) {
- quote = true;
- builder.append(openDoubleQuote);
- } else if (prev == ' ') {
- builder.append(openDoubleQuote);
- } else {
- builder.append(closeDoubleQuote);
- }
- break;
-
- case '-':
- if (space) {
- quote = true;
- } else {
- dashCount++;
- }
- space = false;
- break;
-
- case '*':
- case '~':
- case '/':
- case '\\':
- case '<':
- case '>':
- case '=':
- case '+':
- case '_':
- case 'â':
- case 'â':
- space = false;
- builder.append(car);
- break;
-
- case 'â':
- case '`':
- case 'â¹':
- case 'ï¹':
- case 'ã':
- case 'ã':
- if (space || (brk && quote)) {
- quote = true;
- builder.append(openQuote);
- } else {
- builder.append(openQuote);
- }
- space = false;
- brk = false;
- break;
+ Chapter cc = BasicSupportPara.makeChapter(this, chapUrl, i,
+ chapName, content, isHtml(), pgMakeChapter);
+ if (!pgMakeChapter.isDone()) {
+ pgMakeChapter.setProgress(pgMakeChapter.getMax());
+ }
- case 'â':
- case 'âº':
- case 'ï¹':
- case 'ã':
- case 'ã':
- space = false;
- brk = false;
- builder.append(closeQuote);
- break;
+ words += cc.getWords();
+ story.getChapters().add(cc);
+ story.getMeta().setWords(words);
- case '«':
- case 'â':
- case 'ï¹':
- case 'ã':
- case 'ã':
- if (space || (brk && quote)) {
- quote = true;
- builder.append(openDoubleQuote);
- } else {
- builder.append(openDoubleQuote);
+ i++;
}
- space = false;
- brk = false;
- break;
- case '»':
- case 'â':
- case 'ï¹':
- case 'ã':
- case 'ã':
- space = false;
- brk = false;
- builder.append(closeDoubleQuote);
- break;
-
- default:
- space = false;
- brk = false;
- builder.append(car);
- break;
+ pgChaps.setName("Extracting chapters");
+ } else {
+ pg.setProgress(80);
}
- prev = car;
- }
-
- if (tentativeCloseQuote) {
- tentativeCloseQuote = false;
- builder.append(closeQuote);
- }
-
- line = builder.toString().trim();
-
- ParagraphType type = ParagraphType.NORMAL;
- if (space) {
- type = ParagraphType.BLANK;
- } else if (brk) {
- type = ParagraphType.BREAK;
- } else if (quote) {
- type = ParagraphType.QUOTE;
- }
-
- return new Paragraph(type, line);
- }
-
- /**
- * Remove the HTML from the inpit if {@link BasicSupport#isHtml()} is
- * true.
- *
- * @param input
- * the input
- *
- * @return the no html version if needed
- */
- private String ifUnhtml(String input) {
- if (isHtml() && input != null) {
- return StringUtils.unhtml(input);
+ return story;
+ } finally {
+ close();
}
-
- return input;
}
/**
@@ -1115,7 +442,7 @@ public abstract class BasicSupport {
// TEXT and INFO_TEXT always support files (not URLs though)
for (SupportType type : SupportType.values()) {
if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
- BasicSupport support = getSupport(type);
+ BasicSupport support = getSupport(type, url);
if (support != null && support.supports(url)) {
return support;
}
@@ -1124,7 +451,7 @@ public abstract class BasicSupport {
for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
SupportType.TEXT }) {
- BasicSupport support = getSupport(type);
+ BasicSupport support = getSupport(type, url);
if (support != null && support.supports(url)) {
return support;
}
@@ -1138,113 +465,61 @@ public abstract class BasicSupport {
*
* @param type
* the type
+ * @param url
+ * the {@link URL} to support (can be NULL to get an
+ * "abstract support")
*
* @return an implementation that supports it, or NULL
*/
- public static BasicSupport getSupport(SupportType type) {
+ public static BasicSupport getSupport(SupportType type, URL url) {
+ BasicSupport support = null;
+
switch (type) {
case EPUB:
- return new Epub().setType(type);
+ support = new Epub();
+ break;
case INFO_TEXT:
- return new InfoText().setType(type);
+ support = new InfoText();
+ break;
case FIMFICTION:
- return new Fimfiction().setType(type);
+ try {
+ // Can fail if no client key or NO in options
+ support = new FimfictionApi();
+ } catch (IOException e) {
+ support = new Fimfiction();
+ }
+ break;
case FANFICTION:
- return new Fanfiction().setType(type);
+ support = new Fanfiction();
+ break;
case TEXT:
- return new Text().setType(type);
+ support = new Text();
+ break;
case MANGAFOX:
- return new MangaFox().setType(type);
+ support = new MangaFox();
+ break;
case E621:
- return new E621().setType(type);
+ support = new E621();
+ break;
+ case YIFFSTAR:
+ support = new YiffStar();
+ break;
+ case E_HENTAI:
+ support = new EHentai();
+ break;
case CBZ:
- return new Cbz().setType(type);
+ support = new Cbz();
+ break;
case HTML:
- return new Html().setType(type);
+ support = new Html();
+ break;
}
- return null;
- }
-
- /**
- * Return the first line from the given input which correspond to the given
- * selectors.
- *
- * @param in
- * the input
- * @param needle
- * a string that must be found inside the target line (also
- * supports "^" at start to say "only if it starts with" the
- * needle)
- * @param relativeLine
- * the line to return based upon the target line position (-1 =
- * the line before, 0 = the target line...)
- *
- * @return the line
- */
- static String getLine(InputStream in, String needle, int relativeLine) {
- return getLine(in, needle, relativeLine, true);
- }
-
- /**
- * Return a line from the given input which correspond to the given
- * selectors.
- *
- * @param in
- * the input
- * @param needle
- * a string that must be found inside the target line (also
- * supports "^" at start to say "only if it starts with" the
- * needle)
- * @param relativeLine
- * the line to return based upon the target line position (-1 =
- * the line before, 0 = the target line...)
- * @param first
- * takes the first result (as opposed to the last one, which will
- * also always spend the input)
- *
- * @return the line
- */
- static String getLine(InputStream in, String needle, int relativeLine,
- boolean first) {
- String rep = null;
-
- try {
- in.reset();
- } catch (IOException e) {
- Instance.syserr(e);
- }
-
- List
processing:
- content = content.replaceAll("(
]*>)|(
)|(
)",
- "\n* * *\n");
- }
+ public Story processMeta() throws IOException {
+ Story story = null;
- List
|
|\\n)");
- } else {
- lines = new String[] { encodedLine };
- }
-
- for (String aline : lines) {
- String line = aline.trim();
-
- URL image = null;
- if (line.startsWith("[") && line.endsWith("]")) {
- image = getImageUrl(this, source,
- line.substring(1, line.length() - 1).trim());
- }
-
- if (image != null) {
- paras.add(new Paragraph(image));
- } else {
- paras.add(processPara(line));
- }
- }
- }
+ story = processMeta(false, null);
} finally {
- in.close();
- }
-
- // Check quotes for "bad" format
- List