From: Niki Roo Date: Sun, 18 Mar 2018 19:16:19 +0000 (+0100) Subject: Change BasicSupport to use jsoup X-Git-Tag: fanfix-swing-0.0.1~12^2~361 X-Git-Url: https://git.nikiroo.be/?a=commitdiff_plain;h=0ffa47548f474c1330d8d723300d9aa7a4894736;p=fanfix-swing.git Change BasicSupport to use jsoup --- diff --git a/README-fr.md b/README-fr.md index d45f6f35..59837e45 100644 --- a/README-fr.md +++ b/README-fr.md @@ -94,6 +94,7 @@ Quelques tests unitaires sont disponibles : Nécessaires : - libs/nikiroo-utils-sources.jar: quelques utilitaires partagés - [libs/unbescape-sources.jar](https://github.com/unbescape/unbescape): une librairie sympathique pour convertir du texte depuis/vers beaucoup de formats ; utilisée ici pour la partie HTML +- [libs/jsoup-sources.jar](https://jsoup.org/): une libraririe pour parser du HTML Optionnelles : - [libs/jexer-sources.jar](https://github.com/klamonte/jexer): une petite librairie qui offre des widgets en mode TUI diff --git a/README.md b/README.md index eeb1a870..e2312686 100644 --- a/README.md +++ b/README.md @@ -94,6 +94,7 @@ There are some unit tests you can run, too: - libs/nikiroo-utils-sources.jar: some shared utility functions - [libs/unbescape-sources.jar](https://github.com/unbescape/unbescape): a nice library to escape/unescape a lot of text formats; used here for HTML - [libs/jexer-sources.jar](https://github.com/klamonte/jexer): a small library that offers TUI widgets +- [libs/jsoup-sources.jar](https://jsoup.org/): a library to parse HTML Nothing else but Java 1.6+. diff --git a/libs/jsoup-1.10.3-sources.jar b/libs/jsoup-1.10.3-sources.jar new file mode 100644 index 00000000..1fe0db4e Binary files /dev/null and b/libs/jsoup-1.10.3-sources.jar differ diff --git a/src/be/nikiroo/fanfix/Main.java b/src/be/nikiroo/fanfix/Main.java index 35454121..a61c5292 100644 --- a/src/be/nikiroo/fanfix/Main.java +++ b/src/be/nikiroo/fanfix/Main.java @@ -21,7 +21,7 @@ import be.nikiroo.fanfix.reader.BasicReader; import be.nikiroo.fanfix.reader.Reader; import be.nikiroo.fanfix.reader.Reader.ReaderType; import be.nikiroo.fanfix.supported.BasicSupport; -import be.nikiroo.fanfix.supported.BasicSupport.SupportType; +import be.nikiroo.fanfix.supported.SupportType; import be.nikiroo.utils.Progress; import be.nikiroo.utils.Version; import be.nikiroo.utils.serial.server.ServerObject; @@ -538,7 +538,7 @@ public class Main { pg.addProgress(pgOut, 1); } - Story story = support.process(source, pgIn); + Story story = support.process(pgIn); try { target = new File(target).getAbsolutePath(); BasicOutput.getOutput(type, infoCover, infoCover) diff --git a/src/be/nikiroo/fanfix/bundles/StringIdBundle.java b/src/be/nikiroo/fanfix/bundles/StringIdBundle.java index 24a9252d..0b129e49 100644 --- a/src/be/nikiroo/fanfix/bundles/StringIdBundle.java +++ b/src/be/nikiroo/fanfix/bundles/StringIdBundle.java @@ -35,7 +35,6 @@ public class StringIdBundle extends TransBundle { */ public Locale getLanguage() { return getLocaleFor(lang); - } /** diff --git a/src/be/nikiroo/fanfix/library/BasicLibrary.java b/src/be/nikiroo/fanfix/library/BasicLibrary.java index 8c86fc90..350a8caf 100644 --- a/src/be/nikiroo/fanfix/library/BasicLibrary.java +++ b/src/be/nikiroo/fanfix/library/BasicLibrary.java @@ -14,7 +14,7 @@ import be.nikiroo.fanfix.data.Story; import be.nikiroo.fanfix.output.BasicOutput; import be.nikiroo.fanfix.output.BasicOutput.OutputType; import be.nikiroo.fanfix.supported.BasicSupport; -import be.nikiroo.fanfix.supported.BasicSupport.SupportType; +import be.nikiroo.fanfix.supported.SupportType; import be.nikiroo.utils.Image; import be.nikiroo.utils.Progress; @@ -343,8 +343,8 @@ abstract public class BasicLibrary { .getType()); URL url = file.toURI().toURL(); if (type != null) { - story = BasicSupport.getSupport(type).process(url, - pgProcess); + story = BasicSupport.getSupport(type, url) // + .process(pgProcess); // Because we do not want to clear the meta cache: meta.setCover(story.getMeta().getCover()); story.setMeta(meta); @@ -392,7 +392,7 @@ abstract public class BasicLibrary { throw new UnknownHostException("" + url); } - return save(support.process(url, pg), null); + return save(support.process(pg), null); } /** diff --git a/src/be/nikiroo/fanfix/reader/BasicReader.java b/src/be/nikiroo/fanfix/reader/BasicReader.java index 74a99c40..9d5d5bb8 100644 --- a/src/be/nikiroo/fanfix/reader/BasicReader.java +++ b/src/be/nikiroo/fanfix/reader/BasicReader.java @@ -98,7 +98,7 @@ public abstract class BasicReader implements Reader { throw new IOException("URL not supported: " + source.toString()); } - story = support.process(source, pg); + story = support.process(pg); if (story == null) { throw new IOException( "Cannot retrieve story from external source: " diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport.java b/src/be/nikiroo/fanfix/supported/BasicSupport.java index 0c127aa2..4314b99c 100644 --- a/src/be/nikiroo/fanfix/supported/BasicSupport.java +++ b/src/be/nikiroo/fanfix/supported/BasicSupport.java @@ -1,12 +1,7 @@ package be.nikiroo.fanfix.supported; -import java.io.BufferedReader; -import java.io.ByteArrayInputStream; -import java.io.File; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; -import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; import java.util.Date; @@ -14,17 +9,17 @@ import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; -import java.util.Scanner; + +import org.jsoup.helper.DataUtil; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; import be.nikiroo.fanfix.Instance; -import be.nikiroo.fanfix.bundles.Config; import be.nikiroo.fanfix.bundles.StringId; import be.nikiroo.fanfix.data.Chapter; import be.nikiroo.fanfix.data.MetaData; -import be.nikiroo.fanfix.data.Paragraph; -import be.nikiroo.fanfix.data.Paragraph.ParagraphType; import be.nikiroo.fanfix.data.Story; -import be.nikiroo.utils.Image; import be.nikiroo.utils.Progress; import be.nikiroo.utils.StringUtils; @@ -38,136 +33,11 @@ import be.nikiroo.utils.StringUtils; * @author niki */ public abstract class BasicSupport { - /** - * The supported input types for which we can get a {@link BasicSupport} - * object. - * - * @author niki - */ - public enum SupportType { - /** EPUB files created with this program */ - EPUB, - /** Pure text file with some rules */ - TEXT, - /** TEXT but with associated .info file */ - INFO_TEXT, - /** My Little Pony fanfictions */ - FIMFICTION, - /** Fanfictions from a lot of different universes */ - FANFICTION, - /** Website with lots of Mangas */ - MANGAFOX, - /** Furry website with comics support */ - E621, - /** Furry website with stories */ - YIFFSTAR, - /** Comics and images groups, mostly but not only NSFW */ - E_HENTAI, - /** CBZ files */ - CBZ, - /** HTML files */ - HTML; - - /** - * A description of this support type (more information than the - * {@link BasicSupport#getSourceName()}). - * - * @return the description - */ - public String getDesc() { - String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC, - this.name()); - - if (desc == null) { - desc = Instance.getTrans().getString(StringId.INPUT_DESC, this); - } - - return desc; - } - - /** - * The name of this support type (a short version). - * - * @return the name - */ - public String getSourceName() { - BasicSupport support = BasicSupport.getSupport(this); - if (support != null) { - return support.getSourceName(); - } - - return null; - } - - @Override - public String toString() { - return super.toString().toLowerCase(); - } - - /** - * Call {@link SupportType#valueOf(String)} after conversion to upper - * case. - * - * @param typeName - * the possible type name - * - * @return NULL or the type - */ - public static SupportType valueOfUC(String typeName) { - return SupportType.valueOf(typeName == null ? null : typeName - .toUpperCase()); - } - - /** - * Call {@link SupportType#valueOf(String)} after conversion to upper - * case but return NULL for NULL instead of raising exception. - * - * @param typeName - * the possible type name - * - * @return NULL or the type - */ - public static SupportType valueOfNullOkUC(String typeName) { - if (typeName == null) { - return null; - } - - return SupportType.valueOfUC(typeName); - } - - /** - * Call {@link SupportType#valueOf(String)} after conversion to upper - * case but return NULL in case of error instead of raising an - * exception. - * - * @param typeName - * the possible type name - * - * @return NULL or the type - */ - public static SupportType valueOfAllOkUC(String typeName) { - try { - return SupportType.valueOfUC(typeName); - } catch (Exception e) { - return null; - } - } - } - - private InputStream in; + private Document sourceNode; + private URL source; private SupportType type; private URL currentReferer; // with only one 'r', as in 'HTTP'... - // quote chars - private char openQuote = Instance.getTrans().getCharacter( - StringId.OPEN_SINGLE_QUOTE); - private char closeQuote = Instance.getTrans().getCharacter( - StringId.CLOSE_SINGLE_QUOTE); - private char openDoubleQuote = Instance.getTrans().getCharacter( - StringId.OPEN_DOUBLE_QUOTE); - private char closeDoubleQuote = Instance.getTrans().getCharacter( - StringId.CLOSE_DOUBLE_QUOTE); - /** * The name of this support class. * @@ -196,61 +66,45 @@ public abstract class BasicSupport { /** * Return the {@link MetaData} of this story. * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * * @return the associated {@link MetaData}, never NULL * * @throws IOException * in case of I/O error */ - protected abstract MetaData getMeta(URL source, InputStream in) - throws IOException; + protected abstract MetaData getMeta() throws IOException; /** * Return the story description. * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * * @return the description * * @throws IOException * in case of I/O error */ - protected abstract String getDesc(URL source, InputStream in) - throws IOException; + protected abstract String getDesc() throws IOException; /** - * Return the list of chapters (name and resource). + * Return the list of chapters (name and resource). * + *

+ * Can be NULL if this {@link BasicSupport} do no use chapters. * - * @param source - * the source of the story - * @param in - * the input (the main resource) * @param pg * the optional progress reporter * - * @return the chapters + * @return the chapters or NULL * * @throws IOException * in case of I/O error */ - protected abstract List> getChapters(URL source, - InputStream in, Progress pg) throws IOException; + protected abstract List> getChapters(Progress pg) + throws IOException; /** * Return the content of the chapter (possibly HTML encoded, if * {@link BasicSupport#isHtml()} is TRUE). * - * @param source - * the source of the story - * @param in - * the input (the main resource) + * @param chapUrl + * the chapter {@link URL} * @param number * the chapter number * @param pg @@ -261,18 +115,8 @@ public abstract class BasicSupport { * @throws IOException * in case of I/O error */ - protected abstract String getChapterContent(URL source, InputStream in, - int number, Progress pg) throws IOException; - - /** - * Log into the support (can be a no-op depending upon the support). - * - * @throws IOException - * in case of I/O error - */ - @SuppressWarnings("unused") - public void login() throws IOException { - } + protected abstract String getChapterContent(URL chapUrl, int number, + Progress pg) throws IOException; /** * Return the list of cookies (values included) that must be used to @@ -300,42 +144,146 @@ public abstract class BasicSupport { * Return the canonical form of the main {@link URL}. * * @param source + * the source {@link URL}, which can be NULL + * + * @return the canonical form of this {@link URL} or NULL if the source was + * NULL + */ + protected URL getCanonicalUrl(URL source) { + return source; + } + + /** + * The main {@link Node} for this {@link Story}. + * + * @return the node + */ + protected Element getSourceNode() { + return sourceNode; + } + + /** + * The main {@link URL} for this {@link Story}. + * + * @return the URL + */ + protected URL getSource() { + return source; + } + + /** + * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e., + * the current {@link URL} we work on. + * + * @return the referer + */ + public URL getCurrentReferer() { + return currentReferer; + } + + /** + * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e., + * the current {@link URL} we work on. + * + * @param currentReferer + * the new referer + */ + protected void setCurrentReferer(URL currentReferer) { + this.currentReferer = currentReferer; + } + + /** + * The support type. + * + * @return the type + */ + public SupportType getType() { + return type; + } + + /** + * The support type. + * + * @param type + * the new type + */ + protected void setType(SupportType type) { + this.type = type; + } + + /** + * Open an input link that will be used for the support. + *

+ * Can return NULL, in which case you are supposed to work without an + * {@link InputStream}. + * + * @param source * the source {@link URL} * - * @return the canonical form of this {@link URL} + * @return the {@link InputStream} + * + * @throws IOException + * in case of I/O error + */ + protected Document loadDocument(URL source) throws IOException { + String url = getCanonicalUrl(source).toString(); + return DataUtil.load(Instance.getCache().open(source, this, false), + "UTF-8", url.toString()); + } + + /** + * Log into the support (can be a no-op depending upon the support). * * @throws IOException * in case of I/O error */ @SuppressWarnings("unused") - public URL getCanonicalUrl(URL source) throws IOException { - return source; + protected void login() throws IOException { + } + + /** + * Prepare the support if needed before processing. + * + * @throws IOException + * on I/O error + */ + @SuppressWarnings("unused") + protected void preprocess() throws IOException { + } + + /** + * Now that we have processed the {@link Story}, close the resources if any. + */ + protected void close() { + setCurrentReferer(null); } /** * Process the given story resource into a partially filled {@link Story} * object containing the name and metadata, except for the description. * - * @param url - * the story resource - * * @return the {@link Story} * * @throws IOException * in case of I/O error */ - public Story processMeta(URL url) throws IOException { - return processMeta(url, true, false, null); + public Story processMeta() throws IOException { + Story story = null; + + preprocess(); + try { + story = processMeta(false, null); + } finally { + close(); + } + + return story; } /** * Process the given story resource into a partially filled {@link Story} * object containing the name and metadata. * - * @param url - * the story resource - * @param close - * close "this" and "in" when done * @param getDesc * retrieve the description of the story, or not * @param pg @@ -346,75 +294,48 @@ public abstract class BasicSupport { * @throws IOException * in case of I/O error */ - protected Story processMeta(URL url, boolean close, boolean getDesc, - Progress pg) throws IOException { + protected Story processMeta(boolean getDesc, Progress pg) + throws IOException { if (pg == null) { pg = new Progress(); } else { pg.setMinMax(0, 100); } - login(); - pg.setProgress(10); - - url = getCanonicalUrl(url); - - setCurrentReferer(url); - - in = openInput(url); // NULL allowed here - try { - preprocess(url, getInput()); - pg.setProgress(30); - - Story story = new Story(); - MetaData meta = getMeta(url, getInput()); - if (meta.getCreationDate() == null - || meta.getCreationDate().isEmpty()) { - meta.setCreationDate(StringUtils.fromTime(new Date().getTime())); - } - story.setMeta(meta); - - pg.setProgress(50); + pg.setProgress(30); - if (meta.getCover() == null) { - meta.setCover(getDefaultCover(meta.getSubject())); - } - - pg.setProgress(60); + Story story = new Story(); + MetaData meta = getMeta(); + if (meta.getCreationDate() == null || meta.getCreationDate().isEmpty()) { + meta.setCreationDate(StringUtils.fromTime(new Date().getTime())); + } + story.setMeta(meta); - if (getDesc) { - String descChapterName = Instance.getTrans().getString( - StringId.DESCRIPTION); - story.getMeta().setResume( - makeChapter(url, 0, descChapterName, - getDesc(url, getInput()), null)); - } + pg.setProgress(50); - pg.setProgress(100); - return story; - } finally { - if (close) { - try { - close(); - } catch (IOException e) { - Instance.getTraceHandler().error(e); - } + if (meta.getCover() == null) { + meta.setCover(BasicSupportHelper.getDefaultCover(meta.getSubject())); + } - if (in != null) { - in.close(); - } - } + pg.setProgress(60); - setCurrentReferer(null); + if (getDesc) { + String descChapterName = Instance.getTrans().getString( + StringId.DESCRIPTION); + story.getMeta().setResume( + BasicSupportPara.makeChapter(this, source, 0, + descChapterName, // + getDesc(), isHtml(), null)); } + + pg.setProgress(100); + return story; } /** * Process the given story resource into a fully filled {@link Story} * object. * - * @param url - * the story resource * @param pg * the optional progress reporter * @@ -423,32 +344,33 @@ public abstract class BasicSupport { * @throws IOException * in case of I/O error */ - public Story process(URL url, Progress pg) throws IOException { + public Story process(Progress pg) throws IOException { if (pg == null) { pg = new Progress(); } else { pg.setMinMax(0, 100); } - url = getCanonicalUrl(url); + setCurrentReferer(source); + login(); + sourceNode = loadDocument(source); + pg.setProgress(1); try { Progress pgMeta = new Progress(); pg.addProgress(pgMeta, 10); - Story story = processMeta(url, false, true, pgMeta); + preprocess(); + Story story = processMeta(true, pgMeta); if (!pgMeta.isDone()) { pgMeta.setProgress(pgMeta.getMax()); // 10% } pg.setName("Retrieving " + story.getMeta().getTitle()); - setCurrentReferer(url); - Progress pgGetChapters = new Progress(); pg.addProgress(pgGetChapters, 10); story.setChapters(new ArrayList()); - List> chapters = getChapters(url, getInput(), - pgGetChapters); + List> chapters = getChapters(pgGetChapters); if (!pgGetChapters.isDone()) { pgGetChapters.setProgress(pgGetChapters.getMax()); // 20% } @@ -462,41 +384,35 @@ public abstract class BasicSupport { int i = 1; for (Entry chap : chapters) { pgChaps.setName("Extracting chapter " + i); - InputStream chapIn = null; - if (chap.getValue() != null) { - setCurrentReferer(chap.getValue()); - chapIn = Instance.getCache().open(chap.getValue(), - this, false); + URL chapUrl = chap.getValue(); + String chapName = chap.getKey(); + if (chapUrl != null) { + setCurrentReferer(chapUrl); } - pgChaps.setProgress(i * 100); - try { - Progress pgGetChapterContent = new Progress(); - Progress pgMakeChapter = new Progress(); - pgChaps.addProgress(pgGetChapterContent, 100); - pgChaps.addProgress(pgMakeChapter, 100); - - String content = getChapterContent(url, chapIn, i, - pgGetChapterContent); - if (!pgGetChapterContent.isDone()) { - pgGetChapterContent.setProgress(pgGetChapterContent - .getMax()); - } - Chapter cc = makeChapter(url, i, chap.getKey(), - content, pgMakeChapter); - if (!pgMakeChapter.isDone()) { - pgMakeChapter.setProgress(pgMakeChapter.getMax()); - } + pgChaps.setProgress(i * 100); + Progress pgGetChapterContent = new Progress(); + Progress pgMakeChapter = new Progress(); + pgChaps.addProgress(pgGetChapterContent, 100); + pgChaps.addProgress(pgMakeChapter, 100); + + String content = getChapterContent(chapUrl, i, + pgGetChapterContent); + if (!pgGetChapterContent.isDone()) { + pgGetChapterContent.setProgress(pgGetChapterContent + .getMax()); + } - words += cc.getWords(); - story.getChapters().add(cc); - story.getMeta().setWords(words); - } finally { - if (chapIn != null) { - chapIn.close(); - } + Chapter cc = BasicSupportPara.makeChapter(this, chapUrl, i, + chapName, content, isHtml(), pgMakeChapter); + if (!pgMakeChapter.isDone()) { + pgMakeChapter.setProgress(pgMakeChapter.getMax()); } + words += cc.getWords(); + story.getChapters().add(cc); + story.getMeta().setWords(words); + i++; } @@ -506,1148 +422,106 @@ public abstract class BasicSupport { } return story; - } finally { - try { - close(); - } catch (IOException e) { - Instance.getTraceHandler().error(e); - } - - if (in != null) { - in.close(); - } - - setCurrentReferer(null); + close(); } } /** - * The support type. + * Return a {@link BasicSupport} implementation supporting the given + * resource if possible. * - * @return the type - */ - public SupportType getType() { - return type; - } - - /** - * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e., - * the current {@link URL} we work on. + * @param url + * the story resource * - * @return the referer + * @return an implementation that supports it, or NULL */ - public URL getCurrentReferer() { - return currentReferer; - } + public static BasicSupport getSupport(URL url) { + if (url == null) { + return null; + } - /** - * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e., - * the current {@link URL} we work on. - * - * @param currentReferer - * the new referer - */ - protected void setCurrentReferer(URL currentReferer) { - this.currentReferer = currentReferer; - } + // TEXT and INFO_TEXT always support files (not URLs though) + for (SupportType type : SupportType.values()) { + if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) { + BasicSupport support = getSupport(type, url); + if (support != null && support.supports(url)) { + return support; + } + } + } - /** - * The support type. - * - * @param type - * the new type - * - * @return this - */ - protected BasicSupport setType(SupportType type) { - this.type = type; - return this; + for (SupportType type : new SupportType[] { SupportType.INFO_TEXT, + SupportType.TEXT }) { + BasicSupport support = getSupport(type, url); + if (support != null && support.supports(url)) { + return support; + } + } + + return null; } /** - * Prepare the support if needed before processing. + * Return a {@link BasicSupport} implementation supporting the given type. * - * @param source - * the source of the story - * @param in - * the input (the main resource) + * @param type + * the type + * @param url + * the {@link URL} to support (can be NULL to get an + * "abstract support") * - * @throws IOException - * on I/O error + * @return an implementation that supports it, or NULL */ - @SuppressWarnings("unused") - protected void preprocess(URL source, InputStream in) throws IOException { - } + public static BasicSupport getSupport(SupportType type, URL url) { + BasicSupport support = null; - /** - * Now that we have processed the {@link Story}, close the resources if any. - * - * @throws IOException - * on I/O error - */ - @SuppressWarnings("unused") - protected void close() throws IOException { - } - - /** - * Create a {@link Chapter} object from the given information, formatting - * the content as it should be. - * - * @param source - * the source of the story - * @param number - * the chapter number - * @param name - * the chapter name - * @param content - * the chapter content - * @param pg - * the optional progress reporter - * - * @return the {@link Chapter} - * - * @throws IOException - * in case of I/O error - */ - protected Chapter makeChapter(URL source, int number, String name, - String content, Progress pg) throws IOException { - // Chapter name: process it correctly, then remove the possible - // redundant "Chapter x: " in front of it, or "-" (as in - // "Chapter 5: - Fun!" after the ": " was automatically added) - String chapterName = processPara(name).getContent().trim(); - for (String lang : Instance.getConfig().getString(Config.CHAPTER) - .split(",")) { - String chapterWord = Instance.getConfig().getStringX( - Config.CHAPTER, lang); - if (chapterName.startsWith(chapterWord)) { - chapterName = chapterName.substring(chapterWord.length()) - .trim(); - break; - } - } - - if (chapterName.startsWith(Integer.toString(number))) { - chapterName = chapterName.substring( - Integer.toString(number).length()).trim(); - } - - while (chapterName.startsWith(":") || chapterName.startsWith("-")) { - chapterName = chapterName.substring(1).trim(); - } - // - - Chapter chap = new Chapter(number, chapterName); - - if (content != null) { - List paras = makeParagraphs(source, content, pg); - long words = 0; - for (Paragraph para : paras) { - words += para.getWords(); - } - chap.setParagraphs(paras); - chap.setWords(words); - } - - return chap; - - } - - /** - * Convert the given content into {@link Paragraph}s. - * - * @param source - * the source URL of the story - * @param content - * the textual content - * @param pg - * the optional progress reporter - * - * @return the {@link Paragraph}s - * - * @throws IOException - * in case of I/O error - */ - protected List makeParagraphs(URL source, String content, - Progress pg) throws IOException { - if (pg == null) { - pg = new Progress(); - } - - if (isHtml()) { - // Special


processing: - content = content.replaceAll("(
]*>)|(
)|(
)", - "
* * *
"); - } - - List paras = new ArrayList(); - - if (content != null && !content.trim().isEmpty()) { - if (isHtml()) { - String[] tab = content.split("(

|

|
|
)"); - pg.setMinMax(0, tab.length); - int i = 1; - for (String line : tab) { - if (line.startsWith("[") && line.endsWith("]")) { - pg.setName("Extracting image " + i); - } - paras.add(makeParagraph(source, line.trim())); - pg.setProgress(i++); - } - pg.setName(null); - } else { - List lines = new ArrayList(); - BufferedReader buff = null; - try { - buff = new BufferedReader( - new InputStreamReader(new ByteArrayInputStream( - content.getBytes("UTF-8")), "UTF-8")); - for (String line = buff.readLine(); line != null; line = buff - .readLine()) { - lines.add(line.trim()); - } - } finally { - if (buff != null) { - buff.close(); - } - } - - pg.setMinMax(0, lines.size()); - int i = 0; - for (String line : lines) { - if (line.startsWith("[") && line.endsWith("]")) { - pg.setName("Extracting image " + i); - } - paras.add(makeParagraph(source, line)); - pg.setProgress(i++); - } - pg.setName(null); - } - - // Check quotes for "bad" format - List newParas = new ArrayList(); - for (Paragraph para : paras) { - newParas.addAll(requotify(para)); - } - paras = newParas; - - // Remove double blanks/brks - fixBlanksBreaks(paras); - } - - return paras; - } - - /** - * Convert the given line into a single {@link Paragraph}. - * - * @param source - * the source URL of the story - * @param line - * the textual content of the paragraph - * - * @return the {@link Paragraph} - */ - private Paragraph makeParagraph(URL source, String line) { - Image image = null; - if (line.startsWith("[") && line.endsWith("]")) { - image = getImage(this, source, line.substring(1, line.length() - 1) - .trim()); - } - - if (image != null) { - return new Paragraph(image); - } - - return processPara(line); - } - - /** - * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of - * those {@link Paragraph}s. - *

- * The resulting list will not contain a starting or trailing blank/break - * nor 2 blanks or breaks following each other. - * - * @param paras - * the list of {@link Paragraph}s to fix - */ - protected void fixBlanksBreaks(List paras) { - boolean space = false; - boolean brk = true; - for (int i = 0; i < paras.size(); i++) { - Paragraph para = paras.get(i); - boolean thisSpace = para.getType() == ParagraphType.BLANK; - boolean thisBrk = para.getType() == ParagraphType.BREAK; - - if (i > 0 && space && thisBrk) { - paras.remove(i - 1); - i--; - } else if ((space || brk) && (thisSpace || thisBrk)) { - paras.remove(i); - i--; - } - - space = thisSpace; - brk = thisBrk; - } - - // Remove blank/brk at start - if (paras.size() > 0 - && (paras.get(0).getType() == ParagraphType.BLANK || paras.get( - 0).getType() == ParagraphType.BREAK)) { - paras.remove(0); - } - - // Remove blank/brk at end - int last = paras.size() - 1; - if (paras.size() > 0 - && (paras.get(last).getType() == ParagraphType.BLANK || paras - .get(last).getType() == ParagraphType.BREAK)) { - paras.remove(last); - } - } - - /** - * Get the default cover related to this subject (see .info files). - * - * @param subject - * the subject - * - * @return the cover if any, or NULL - */ - static Image getDefaultCover(String subject) { - if (subject != null && !subject.isEmpty() - && Instance.getCoverDir() != null) { - try { - File fileCover = new File(Instance.getCoverDir(), subject); - return getImage(null, fileCover.toURI().toURL(), subject); - } catch (MalformedURLException e) { - } - } - - return null; - } - - /** - * Return the list of supported image extensions. - * - * @param emptyAllowed - * TRUE to allow an empty extension on first place, which can be - * used when you may already have an extension in your input but - * are not sure about it - * - * @return the extensions - */ - static String[] getImageExt(boolean emptyAllowed) { - if (emptyAllowed) { - return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" }; - } - - return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" }; - } - - /** - * Check if the given resource can be a local image or a remote image, then - * refresh the cache with it if it is. - * - * @param source - * the story source - * @param line - * the resource to check - * - * @return the image if found, or NULL - * - */ - static Image getImage(BasicSupport support, URL source, String line) { - URL url = getImageUrl(support, source, line); - if (url != null) { - if ("file".equals(url.getProtocol())) { - if (new File(url.getPath()).isDirectory()) { - return null; - } - } - InputStream in = null; - try { - in = Instance.getCache().open(url, getSupport(url), true); - return new Image(in); - } catch (IOException e) { - } finally { - if (in != null) { - try { - in.close(); - } catch (IOException e) { - } - } - } - } - - return null; - } - - /** - * Check if the given resource can be a local image or a remote image, then - * refresh the cache with it if it is. - * - * @param source - * the story source - * @param line - * the resource to check - * - * @return the image URL if found, or NULL - * - */ - static URL getImageUrl(BasicSupport support, URL source, String line) { - URL url = null; - - if (line != null) { - // try for files - if (source != null) { - try { - - String relPath = null; - String absPath = null; - try { - String path = new File(source.getFile()).getParent(); - relPath = new File(new File(path), line.trim()) - .getAbsolutePath(); - } catch (Exception e) { - // Cannot be converted to path (one possibility to take - // into account: absolute path on Windows) - } - try { - absPath = new File(line.trim()).getAbsolutePath(); - } catch (Exception e) { - // Cannot be converted to path (at all) - } - - for (String ext : getImageExt(true)) { - File absFile = new File(absPath + ext); - File relFile = new File(relPath + ext); - if (absPath != null && absFile.exists() - && absFile.isFile()) { - url = absFile.toURI().toURL(); - } else if (relPath != null && relFile.exists() - && relFile.isFile()) { - url = relFile.toURI().toURL(); - } - } - } catch (Exception e) { - // Should not happen since we control the correct arguments - } - } - - if (url == null) { - // try for URLs - try { - for (String ext : getImageExt(true)) { - if (Instance.getCache() - .check(new URL(line + ext), true)) { - url = new URL(line + ext); - break; - } - } - - // try out of cache - if (url == null) { - for (String ext : getImageExt(true)) { - try { - url = new URL(line + ext); - Instance.getCache().refresh(url, support, true); - break; - } catch (IOException e) { - // no image with this ext - url = null; - } - } - } - } catch (MalformedURLException e) { - // Not an url - } - } - - // refresh the cached file - if (url != null) { - try { - Instance.getCache().refresh(url, support, true); - } catch (IOException e) { - // woops, broken image - url = null; - } - } - } - - return url; - } - - /** - * Open the input file that will be used through the support. - *

- * Can return NULL, in which case you are supposed to work without an - * {@link InputStream}. - * - * @param source - * the source {@link URL} - * - * @return the {@link InputStream} - * - * @throws IOException - * in case of I/O error - */ - protected InputStream openInput(URL source) throws IOException { - return Instance.getCache().open(source, this, false); - } - - /** - * Reset then return {@link BasicSupport#in}. - * - * @return {@link BasicSupport#in} - */ - protected InputStream getInput() { - return reset(in); - } - - /** - * Fix the author name if it is prefixed with some "by" {@link String}. - * - * @param author - * the author with a possible prefix - * - * @return the author without prefixes - */ - protected String fixAuthor(String author) { - if (author != null) { - for (String suffix : new String[] { " ", ":" }) { - for (String byString : Instance.getConfig() - .getString(Config.BYS).split(",")) { - byString += suffix; - if (author.toUpperCase().startsWith(byString.toUpperCase())) { - author = author.substring(byString.length()).trim(); - } - } - } - - // Special case (without suffix): - if (author.startsWith("©")) { - author = author.substring(1); - } - } - - return author; - } - - /** - * Check quotes for bad format (i.e., quotes with normal paragraphs inside) - * and requotify them (i.e., separate them into QUOTE paragraphs and other - * paragraphs (quotes or not)). - * - * @param para - * the paragraph to requotify (not necessarily a quote) - * - * @return the correctly (or so we hope) quotified paragraphs - */ - protected List requotify(Paragraph para) { - List newParas = new ArrayList(); - - if (para.getType() == ParagraphType.QUOTE - && para.getContent().length() > 2) { - String line = para.getContent(); - boolean singleQ = line.startsWith("" + openQuote); - boolean doubleQ = line.startsWith("" + openDoubleQuote); - - // Do not try when more than one quote at a time - // (some stories are not easily readable if we do) - if (singleQ - && line.indexOf(closeQuote, 1) < line - .lastIndexOf(closeQuote)) { - newParas.add(para); - return newParas; - } - if (doubleQ - && line.indexOf(closeDoubleQuote, 1) < line - .lastIndexOf(closeDoubleQuote)) { - newParas.add(para); - return newParas; - } - // - - if (!singleQ && !doubleQ) { - line = openDoubleQuote + line + closeDoubleQuote; - newParas.add(new Paragraph(ParagraphType.QUOTE, line, para - .getWords())); - } else { - char open = singleQ ? openQuote : openDoubleQuote; - char close = singleQ ? closeQuote : closeDoubleQuote; - - int posDot = -1; - boolean inQuote = false; - int i = 0; - for (char car : line.toCharArray()) { - if (car == open) { - inQuote = true; - } else if (car == close) { - inQuote = false; - } else if (car == '.' && !inQuote) { - posDot = i; - break; - } - i++; - } - - if (posDot >= 0) { - String rest = line.substring(posDot + 1).trim(); - line = line.substring(0, posDot + 1).trim(); - long words = 1; - for (char car : line.toCharArray()) { - if (car == ' ') { - words++; - } - } - newParas.add(new Paragraph(ParagraphType.QUOTE, line, words)); - if (!rest.isEmpty()) { - newParas.addAll(requotify(processPara(rest))); - } - } else { - newParas.add(para); - } - } - } else { - newParas.add(para); - } - - return newParas; - } - - /** - * Process a {@link Paragraph} from a raw line of text. - *

- * Will also fix quotes and HTML encoding if needed. - * - * @param line - * the raw line - * - * @return the processed {@link Paragraph} - */ - protected Paragraph processPara(String line) { - line = ifUnhtml(line).trim(); - - boolean space = true; - boolean brk = true; - boolean quote = false; - boolean tentativeCloseQuote = false; - char prev = '\0'; - int dashCount = 0; - long words = 1; - - StringBuilder builder = new StringBuilder(); - for (char car : line.toCharArray()) { - if (car != '-') { - if (dashCount > 0) { - // dash, ndash and mdash: - – — - // currently: always use mdash - builder.append(dashCount == 1 ? '-' : '—'); - } - dashCount = 0; - } - - if (tentativeCloseQuote) { - tentativeCloseQuote = false; - if (Character.isLetterOrDigit(car)) { - builder.append("'"); - } else { - // handle double-single quotes as double quotes - if (prev == car) { - builder.append(closeDoubleQuote); - continue; - } - - builder.append(closeQuote); - } - } - - switch (car) { - case ' ': // note: unbreakable space - case ' ': - case '\t': - case '\n': // just in case - case '\r': // just in case - if (builder.length() > 0 - && builder.charAt(builder.length() - 1) != ' ') { - words++; - } - builder.append(' '); - break; - - case '\'': - if (space || (brk && quote)) { - quote = true; - // handle double-single quotes as double quotes - if (prev == car) { - builder.deleteCharAt(builder.length() - 1); - builder.append(openDoubleQuote); - } else { - builder.append(openQuote); - } - } else if (prev == ' ' || prev == car) { - // handle double-single quotes as double quotes - if (prev == car) { - builder.deleteCharAt(builder.length() - 1); - builder.append(openDoubleQuote); - } else { - builder.append(openQuote); - } - } else { - // it is a quote ("I'm off") or a 'quote' ("This - // 'good' restaurant"...) - tentativeCloseQuote = true; - } - break; - - case '"': - if (space || (brk && quote)) { - quote = true; - builder.append(openDoubleQuote); - } else if (prev == ' ') { - builder.append(openDoubleQuote); - } else { - builder.append(closeDoubleQuote); - } - break; - - case '-': - if (space) { - quote = true; - } else { - dashCount++; - } - space = false; - break; - - case '*': - case '~': - case '/': - case '\\': - case '<': - case '>': - case '=': - case '+': - case '_': - case '–': - case '—': - space = false; - builder.append(car); - break; - - case '‘': - case '`': - case '‹': - case '﹁': - case '〈': - case '「': - if (space || (brk && quote)) { - quote = true; - builder.append(openQuote); - } else { - // handle double-single quotes as double quotes - if (prev == car) { - builder.deleteCharAt(builder.length() - 1); - builder.append(openDoubleQuote); - } else { - builder.append(openQuote); - } - } - space = false; - brk = false; - break; - - case '’': - case '›': - case '﹂': - case '〉': - case '」': - space = false; - brk = false; - // handle double-single quotes as double quotes - if (prev == car) { - builder.deleteCharAt(builder.length() - 1); - builder.append(closeDoubleQuote); - } else { - builder.append(closeQuote); - } - break; - - case '«': - case '“': - case '﹃': - case '《': - case '『': - if (space || (brk && quote)) { - quote = true; - builder.append(openDoubleQuote); - } else { - builder.append(openDoubleQuote); - } - space = false; - brk = false; - break; - - case '»': - case '”': - case '﹄': - case '》': - case '』': - space = false; - brk = false; - builder.append(closeDoubleQuote); - break; - - default: - space = false; - brk = false; - builder.append(car); - break; - } - - prev = car; - } - - if (tentativeCloseQuote) { - tentativeCloseQuote = false; - builder.append(closeQuote); - } - - line = builder.toString().trim(); - - ParagraphType type = ParagraphType.NORMAL; - if (space) { - type = ParagraphType.BLANK; - } else if (brk) { - type = ParagraphType.BREAK; - } else if (quote) { - type = ParagraphType.QUOTE; - } - - return new Paragraph(type, line, words); - } - - /** - * Remove the HTML from the input if {@link BasicSupport#isHtml()} is - * true. - * - * @param input - * the input - * - * @return the no html version if needed - */ - private String ifUnhtml(String input) { - if (isHtml() && input != null) { - return StringUtils.unhtml(input); - } - - return input; - } - - /** - * Return a {@link BasicSupport} implementation supporting the given - * resource if possible. - * - * @param url - * the story resource - * - * @return an implementation that supports it, or NULL - */ - public static BasicSupport getSupport(URL url) { - if (url == null) { - return null; - } - - // TEXT and INFO_TEXT always support files (not URLs though) - for (SupportType type : SupportType.values()) { - if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) { - BasicSupport support = getSupport(type); - if (support != null && support.supports(url)) { - return support; - } - } - } - - for (SupportType type : new SupportType[] { SupportType.INFO_TEXT, - SupportType.TEXT }) { - BasicSupport support = getSupport(type); - if (support != null && support.supports(url)) { - return support; - } - } - - return null; - } - - /** - * Return a {@link BasicSupport} implementation supporting the given type. - * - * @param type - * the type - * - * @return an implementation that supports it, or NULL - */ - public static BasicSupport getSupport(SupportType type) { switch (type) { case EPUB: - return new Epub().setType(type); + support = new Epub(); + break; case INFO_TEXT: - return new InfoText().setType(type); + support = new InfoText(); + break; case FIMFICTION: try { // Can fail if no client key or NO in options - return new FimfictionApi().setType(type); + support = new FimfictionApi(); } catch (IOException e) { - return new Fimfiction().setType(type); + support = new Fimfiction(); } + break; case FANFICTION: - return new Fanfiction().setType(type); + support = new Fanfiction(); + break; case TEXT: - return new Text().setType(type); + support = new Text(); + break; case MANGAFOX: - return new MangaFox().setType(type); + support = new MangaFox(); + break; case E621: - return new E621().setType(type); + support = new E621(); + break; case YIFFSTAR: - return new YiffStar().setType(type); + support = new YiffStar(); + break; case E_HENTAI: - return new EHentai().setType(type); + support = new EHentai(); + break; case CBZ: - return new Cbz().setType(type); + support = new Cbz(); + break; case HTML: - return new Html().setType(type); - } - - return null; - } - - /** - * Reset the given {@link InputStream} and return it. - * - * @param in - * the {@link InputStream} to reset - * - * @return the same {@link InputStream} after reset - */ - static protected InputStream reset(InputStream in) { - try { - if (in != null) { - in.reset(); - } - } catch (IOException e) { - } - - return in; - } - - /** - * Return the first line from the given input which correspond to the given - * selectors. - * - * @param in - * the input - * @param needle - * a string that must be found inside the target line (also - * supports "^" at start to say "only if it starts with" the - * needle) - * @param relativeLine - * the line to return based upon the target line position (-1 = - * the line before, 0 = the target line...) - * - * @return the line - */ - static protected String getLine(InputStream in, String needle, - int relativeLine) { - return getLine(in, needle, relativeLine, true); - } - - /** - * Return a line from the given input which correspond to the given - * selectors. - * - * @param in - * the input - * @param needle - * a string that must be found inside the target line (also - * supports "^" at start to say "only if it starts with" the - * needle) - * @param relativeLine - * the line to return based upon the target line position (-1 = - * the line before, 0 = the target line...) - * @param first - * takes the first result (as opposed to the last one, which will - * also always spend the input) - * - * @return the line - */ - static protected String getLine(InputStream in, String needle, - int relativeLine, boolean first) { - String rep = null; - - reset(in); - - List lines = new ArrayList(); - @SuppressWarnings("resource") - Scanner scan = new Scanner(in, "UTF-8"); - int index = -1; - scan.useDelimiter("\\n"); - while (scan.hasNext()) { - lines.add(scan.next()); - - if (index == -1) { - if (needle.startsWith("^")) { - if (lines.get(lines.size() - 1).startsWith( - needle.substring(1))) { - index = lines.size() - 1; - } - - } else { - if (lines.get(lines.size() - 1).contains(needle)) { - index = lines.size() - 1; - } - } - } - - if (index >= 0 && index + relativeLine < lines.size()) { - rep = lines.get(index + relativeLine); - if (first) { - break; - } - } + support = new Html(); + break; } - return rep; - } - - /** - * Return the text between the key and the endKey (and optional subKey can - * be passed, in this case we will look for the key first, then take the - * text between the subKey and the endKey). - *

- * Will only match the first line with the given key if more than one are - * possible. Which also means that if the subKey or endKey is not found on - * that line, NULL will be returned. - * - * @param in - * the input - * @param key - * the key to match (also supports "^" at start to say - * "only if it starts with" the key) - * @param subKey - * the sub key or NULL if none - * @param endKey - * the end key or NULL for "up to the end" - * @return the text or NULL if not found - */ - static protected String getKeyLine(InputStream in, String key, - String subKey, String endKey) { - return getKeyText(getLine(in, key, 0), key, subKey, endKey); - } - - /** - * Return the text between the key and the endKey (and optional subKey can - * be passed, in this case we will look for the key first, then take the - * text between the subKey and the endKey). - * - * @param in - * the input - * @param key - * the key to match (also supports "^" at start to say - * "only if it starts with" the key) - * @param subKey - * the sub key or NULL if none - * @param endKey - * the end key or NULL for "up to the end" - * @return the text or NULL if not found - */ - static protected String getKeyText(String in, String key, String subKey, - String endKey) { - String result = null; - - String line = in; - if (line != null && line.contains(key)) { - line = line.substring(line.indexOf(key) + key.length()); - if (subKey == null || subKey.isEmpty() || line.contains(subKey)) { - if (subKey != null) { - line = line.substring(line.indexOf(subKey) - + subKey.length()); - } - if (endKey == null || line.contains(endKey)) { - if (endKey != null) { - line = line.substring(0, line.indexOf(endKey)); - result = line; - } - } - } - } - - return result; - } - - /** - * Return the text between the key and the endKey (optional subKeys can be - * passed, in this case we will look for the subKeys first, then take the - * text between the key and the endKey). - * - * @param in - * the input - * @param key - * the key to match - * @param endKey - * the end key or NULL for "up to the end" - * @param afters - * the sub-keys to find before checking for key/endKey - * - * @return the text or NULL if not found - */ - static protected String getKeyTextAfter(String in, String key, - String endKey, String... afters) { - - if (in != null && !in.isEmpty()) { - int pos = indexOfAfter(in, 0, afters); - if (pos < 0) { - return null; - } - - in = in.substring(pos); - } - - return getKeyText(in, key, null, endKey); - } - - /** - * Return the first index after all the given "afters" have been found in - * the {@link String}, or -1 if it was not possible. - * - * @param in - * the input - * @param startAt - * start at this position in the string - * @param afters - * the sub-keys to find before checking for key/endKey - * - * @return the text or NULL if not found - */ - static protected int indexOfAfter(String in, int startAt, String... afters) { - int pos = -1; - if (in != null && !in.isEmpty()) { - pos = startAt; - if (afters != null) { - for (int i = 0; pos >= 0 && i < afters.length; i++) { - String subKey = afters[i]; - if (!subKey.isEmpty()) { - pos = in.indexOf(subKey, pos); - if (pos >= 0) { - pos += subKey.length(); - } - } - } - } + if (support != null) { + support.setType(type); + support.source = support.getCanonicalUrl(url); } - return pos; + return support; } } diff --git a/src/be/nikiroo/fanfix/supported/BasicSupportHelper.java b/src/be/nikiroo/fanfix/supported/BasicSupportHelper.java new file mode 100644 index 00000000..0c9e199f --- /dev/null +++ b/src/be/nikiroo/fanfix/supported/BasicSupportHelper.java @@ -0,0 +1,225 @@ +package be.nikiroo.fanfix.supported; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URL; + +import be.nikiroo.fanfix.Instance; +import be.nikiroo.fanfix.bundles.Config; +import be.nikiroo.utils.Image; + +/** + * Helper class for {@link BasicSupport}, mostly dedicated to text formating for + * the classes that implement {@link BasicSupport}. + * + * @author niki + */ +class BasicSupportHelper { + /** + * Get the default cover related to this subject (see .info files). + * + * @param subject + * the subject + * + * @return the cover if any, or NULL + */ + public static Image getDefaultCover(String subject) { + if (subject != null && !subject.isEmpty() + && Instance.getCoverDir() != null) { + try { + File fileCover = new File(Instance.getCoverDir(), subject); + return getImage(null, fileCover.toURI().toURL(), subject); + } catch (MalformedURLException e) { + } + } + + return null; + } + + /** + * Return the list of supported image extensions. + * + * @param emptyAllowed + * TRUE to allow an empty extension on first place, which can be + * used when you may already have an extension in your input but + * are not sure about it + * + * @return the extensions + */ + public static String[] getImageExt(boolean emptyAllowed) { + if (emptyAllowed) { + return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" }; + } + + return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" }; + } + + /** + * Check if the given resource can be a local image or a remote image, then + * refresh the cache with it if it is. + * + * @param support + * the linked {@link BasicSupport} + * @param source + * the story source + * @param line + * the resource to check + * + * @return the image if found, or NULL + * + */ + public static Image getImage(BasicSupport support, URL source, String line) { + URL url = getImageUrl(support, source, line); + if (url != null) { + if ("file".equals(url.getProtocol())) { + if (new File(url.getPath()).isDirectory()) { + return null; + } + } + InputStream in = null; + try { + in = Instance.getCache().open(url, + BasicSupport.getSupport(url), true); + return new Image(in); + } catch (IOException e) { + } finally { + if (in != null) { + try { + in.close(); + } catch (IOException e) { + } + } + } + } + + return null; + } + + /** + * Check if the given resource can be a local image or a remote image, then + * refresh the cache with it if it is. + * + * @param support + * the linked {@link BasicSupport} + * @param source + * the story source + * @param line + * the resource to check + * + * @return the image URL if found, or NULL + * + */ + public static URL getImageUrl(BasicSupport support, URL source, String line) { + URL url = null; + + if (line != null) { + // try for files + if (source != null) { + try { + + String relPath = null; + String absPath = null; + try { + String path = new File(source.getFile()).getParent(); + relPath = new File(new File(path), line.trim()) + .getAbsolutePath(); + } catch (Exception e) { + // Cannot be converted to path (one possibility to take + // into account: absolute path on Windows) + } + try { + absPath = new File(line.trim()).getAbsolutePath(); + } catch (Exception e) { + // Cannot be converted to path (at all) + } + + for (String ext : getImageExt(true)) { + File absFile = new File(absPath + ext); + File relFile = new File(relPath + ext); + if (absPath != null && absFile.exists() + && absFile.isFile()) { + url = absFile.toURI().toURL(); + } else if (relPath != null && relFile.exists() + && relFile.isFile()) { + url = relFile.toURI().toURL(); + } + } + } catch (Exception e) { + // Should not happen since we control the correct arguments + } + } + + if (url == null) { + // try for URLs + try { + for (String ext : getImageExt(true)) { + if (Instance.getCache() + .check(new URL(line + ext), true)) { + url = new URL(line + ext); + break; + } + } + + // try out of cache + if (url == null) { + for (String ext : getImageExt(true)) { + try { + url = new URL(line + ext); + Instance.getCache().refresh(url, support, true); + break; + } catch (IOException e) { + // no image with this ext + url = null; + } + } + } + } catch (MalformedURLException e) { + // Not an url + } + } + + // refresh the cached file + if (url != null) { + try { + Instance.getCache().refresh(url, support, true); + } catch (IOException e) { + // woops, broken image + url = null; + } + } + } + + return url; + } + + /** + * Fix the author name if it is prefixed with some "by" {@link String}. + * + * @param author + * the author with a possible prefix + * + * @return the author without prefixes + */ + public static String fixAuthor(String author) { + if (author != null) { + for (String suffix : new String[] { " ", ":" }) { + for (String byString : Instance.getConfig() + .getString(Config.BYS).split(",")) { + byString += suffix; + if (author.toUpperCase().startsWith(byString.toUpperCase())) { + author = author.substring(byString.length()).trim(); + } + } + } + + // Special case (without suffix): + if (author.startsWith("©")) { + author = author.substring(1); + } + } + + return author; + } +} diff --git a/src/be/nikiroo/fanfix/supported/BasicSupportPara.java b/src/be/nikiroo/fanfix/supported/BasicSupportPara.java new file mode 100644 index 00000000..c098d110 --- /dev/null +++ b/src/be/nikiroo/fanfix/supported/BasicSupportPara.java @@ -0,0 +1,571 @@ +package be.nikiroo.fanfix.supported; + +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStreamReader; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; + +import be.nikiroo.fanfix.Instance; +import be.nikiroo.fanfix.bundles.Config; +import be.nikiroo.fanfix.bundles.StringId; +import be.nikiroo.fanfix.data.Chapter; +import be.nikiroo.fanfix.data.Paragraph; +import be.nikiroo.fanfix.data.Paragraph.ParagraphType; +import be.nikiroo.utils.Image; +import be.nikiroo.utils.Progress; +import be.nikiroo.utils.StringUtils; + +/** + * Helper class for {@link BasicSupport}, mostly dedicated to {@link Paragraph} + * and text formating for the {@link BasicSupport} class itself (not its + * children). + * + * @author niki + */ +class BasicSupportPara { + // quote chars + private static char openQuote = Instance.getTrans().getCharacter( + StringId.OPEN_SINGLE_QUOTE); + private static char closeQuote = Instance.getTrans().getCharacter( + StringId.CLOSE_SINGLE_QUOTE); + private static char openDoubleQuote = Instance.getTrans().getCharacter( + StringId.OPEN_DOUBLE_QUOTE); + private static char closeDoubleQuote = Instance.getTrans().getCharacter( + StringId.CLOSE_DOUBLE_QUOTE); + + /** + * Create a {@link Chapter} object from the given information, formatting + * the content as it should be. + * + * @param support + * the linked {@link BasicSupport} + * @param source + * the source of the story + * @param number + * the chapter number + * @param name + * the chapter name + * @param content + * the chapter content + * @param pg + * the optional progress reporter + * @param html + * TRUE if the input content is in HTML mode + * + * @return the {@link Chapter} + * + * @throws IOException + * in case of I/O error + */ + public static Chapter makeChapter(BasicSupport support, URL source, + int number, String name, String content, boolean html, Progress pg) + throws IOException { + // Chapter name: process it correctly, then remove the possible + // redundant "Chapter x: " in front of it, or "-" (as in + // "Chapter 5: - Fun!" after the ": " was automatically added) + String chapterName = BasicSupportPara.processPara(name, false) + .getContent().trim(); + for (String lang : Instance.getConfig().getString(Config.CHAPTER) + .split(",")) { + String chapterWord = Instance.getConfig().getStringX( + Config.CHAPTER, lang); + if (chapterName.startsWith(chapterWord)) { + chapterName = chapterName.substring(chapterWord.length()) + .trim(); + break; + } + } + + if (chapterName.startsWith(Integer.toString(number))) { + chapterName = chapterName.substring( + Integer.toString(number).length()).trim(); + } + + while (chapterName.startsWith(":") || chapterName.startsWith("-")) { + chapterName = chapterName.substring(1).trim(); + } + // + + Chapter chap = new Chapter(number, chapterName); + + if (content != null) { + List paras = makeParagraphs(support, source, content, + html, pg); + long words = 0; + for (Paragraph para : paras) { + words += para.getWords(); + } + chap.setParagraphs(paras); + chap.setWords(words); + } + + return chap; + } + + /** + * Check quotes for bad format (i.e., quotes with normal paragraphs inside) + * and requotify them (i.e., separate them into QUOTE paragraphs and other + * paragraphs (quotes or not)). + * + * @param para + * the paragraph to requotify (not necessarily a quote) + * @param html + * TRUE if the input content is in HTML mode + * + * @return the correctly (or so we hope) quotified paragraphs + */ + private static List requotify(Paragraph para, boolean html) { + List newParas = new ArrayList(); + + if (para.getType() == ParagraphType.QUOTE + && para.getContent().length() > 2) { + String line = para.getContent(); + boolean singleQ = line.startsWith("" + openQuote); + boolean doubleQ = line.startsWith("" + openDoubleQuote); + + // Do not try when more than one quote at a time + // (some stories are not easily readable if we do) + if (singleQ + && line.indexOf(closeQuote, 1) < line + .lastIndexOf(closeQuote)) { + newParas.add(para); + return newParas; + } + if (doubleQ + && line.indexOf(closeDoubleQuote, 1) < line + .lastIndexOf(closeDoubleQuote)) { + newParas.add(para); + return newParas; + } + // + + if (!singleQ && !doubleQ) { + line = openDoubleQuote + line + closeDoubleQuote; + newParas.add(new Paragraph(ParagraphType.QUOTE, line, para + .getWords())); + } else { + char open = singleQ ? openQuote : openDoubleQuote; + char close = singleQ ? closeQuote : closeDoubleQuote; + + int posDot = -1; + boolean inQuote = false; + int i = 0; + for (char car : line.toCharArray()) { + if (car == open) { + inQuote = true; + } else if (car == close) { + inQuote = false; + } else if (car == '.' && !inQuote) { + posDot = i; + break; + } + i++; + } + + if (posDot >= 0) { + String rest = line.substring(posDot + 1).trim(); + line = line.substring(0, posDot + 1).trim(); + long words = 1; + for (char car : line.toCharArray()) { + if (car == ' ') { + words++; + } + } + newParas.add(new Paragraph(ParagraphType.QUOTE, line, words)); + if (!rest.isEmpty()) { + newParas.addAll(requotify(processPara(rest, html), html)); + } + } else { + newParas.add(para); + } + } + } else { + newParas.add(para); + } + + return newParas; + } + + /** + * Process a {@link Paragraph} from a raw line of text. + *

+ * Will also fix quotes and HTML encoding if needed. + * + * @param line + * the raw line + * @param html + * TRUE if the input content is in HTML mode + * + * @return the processed {@link Paragraph} + */ + private static Paragraph processPara(String line, boolean html) { + if (html) { + line = StringUtils.unhtml(line).trim(); + } + boolean space = true; + boolean brk = true; + boolean quote = false; + boolean tentativeCloseQuote = false; + char prev = '\0'; + int dashCount = 0; + long words = 1; + + StringBuilder builder = new StringBuilder(); + for (char car : line.toCharArray()) { + if (car != '-') { + if (dashCount > 0) { + // dash, ndash and mdash: - – — + // currently: always use mdash + builder.append(dashCount == 1 ? '-' : '—'); + } + dashCount = 0; + } + + if (tentativeCloseQuote) { + tentativeCloseQuote = false; + if (Character.isLetterOrDigit(car)) { + builder.append("'"); + } else { + // handle double-single quotes as double quotes + if (prev == car) { + builder.append(closeDoubleQuote); + continue; + } + + builder.append(closeQuote); + } + } + + switch (car) { + case ' ': // note: unbreakable space + case ' ': + case '\t': + case '\n': // just in case + case '\r': // just in case + if (builder.length() > 0 + && builder.charAt(builder.length() - 1) != ' ') { + words++; + } + builder.append(' '); + break; + + case '\'': + if (space || (brk && quote)) { + quote = true; + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(openDoubleQuote); + } else { + builder.append(openQuote); + } + } else if (prev == ' ' || prev == car) { + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(openDoubleQuote); + } else { + builder.append(openQuote); + } + } else { + // it is a quote ("I'm off") or a 'quote' ("This + // 'good' restaurant"...) + tentativeCloseQuote = true; + } + break; + + case '"': + if (space || (brk && quote)) { + quote = true; + builder.append(openDoubleQuote); + } else if (prev == ' ') { + builder.append(openDoubleQuote); + } else { + builder.append(closeDoubleQuote); + } + break; + + case '-': + if (space) { + quote = true; + } else { + dashCount++; + } + space = false; + break; + + case '*': + case '~': + case '/': + case '\\': + case '<': + case '>': + case '=': + case '+': + case '_': + case '–': + case '—': + space = false; + builder.append(car); + break; + + case '‘': + case '`': + case '‹': + case '﹁': + case '〈': + case '「': + if (space || (brk && quote)) { + quote = true; + builder.append(openQuote); + } else { + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(openDoubleQuote); + } else { + builder.append(openQuote); + } + } + space = false; + brk = false; + break; + + case '’': + case '›': + case '﹂': + case '〉': + case '」': + space = false; + brk = false; + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(closeDoubleQuote); + } else { + builder.append(closeQuote); + } + break; + + case '«': + case '“': + case '﹃': + case '《': + case '『': + if (space || (brk && quote)) { + quote = true; + builder.append(openDoubleQuote); + } else { + builder.append(openDoubleQuote); + } + space = false; + brk = false; + break; + + case '»': + case '”': + case '﹄': + case '》': + case '』': + space = false; + brk = false; + builder.append(closeDoubleQuote); + break; + + default: + space = false; + brk = false; + builder.append(car); + break; + } + + prev = car; + } + + if (tentativeCloseQuote) { + tentativeCloseQuote = false; + builder.append(closeQuote); + } + + line = builder.toString().trim(); + + ParagraphType type = ParagraphType.NORMAL; + if (space) { + type = ParagraphType.BLANK; + } else if (brk) { + type = ParagraphType.BREAK; + } else if (quote) { + type = ParagraphType.QUOTE; + } + + return new Paragraph(type, line, words); + } + + /** + * Convert the given content into {@link Paragraph}s. + * + * @param support + * the linked {@link BasicSupport} + * @param source + * the source URL of the story + * @param content + * the textual content + * @param html + * TRUE if the input content is in HTML mode + * @param pg + * the optional progress reporter + * + * @return the {@link Paragraph}s + * + * @throws IOException + * in case of I/O error + */ + private static List makeParagraphs(BasicSupport support, + URL source, String content, boolean html, Progress pg) + throws IOException { + if (pg == null) { + pg = new Progress(); + } + + if (html) { + // Special


processing: + content = content.replaceAll("(
]*>)|(
)|(
)", + "
* * *
"); + } + + List paras = new ArrayList(); + + if (content != null && !content.trim().isEmpty()) { + if (html) { + String[] tab = content.split("(

|

|
|
)"); + pg.setMinMax(0, tab.length); + int i = 1; + for (String line : tab) { + if (line.startsWith("[") && line.endsWith("]")) { + pg.setName("Extracting image " + i); + } + paras.add(makeParagraph(support, source, line.trim(), html)); + pg.setProgress(i++); + } + pg.setName(null); + } else { + List lines = new ArrayList(); + BufferedReader buff = null; + try { + buff = new BufferedReader( + new InputStreamReader(new ByteArrayInputStream( + content.getBytes("UTF-8")), "UTF-8")); + for (String line = buff.readLine(); line != null; line = buff + .readLine()) { + lines.add(line.trim()); + } + } finally { + if (buff != null) { + buff.close(); + } + } + + pg.setMinMax(0, lines.size()); + int i = 0; + for (String line : lines) { + if (line.startsWith("[") && line.endsWith("]")) { + pg.setName("Extracting image " + i); + } + paras.add(makeParagraph(support, source, line, html)); + pg.setProgress(i++); + } + pg.setName(null); + } + + // Check quotes for "bad" format + List newParas = new ArrayList(); + for (Paragraph para : paras) { + newParas.addAll(BasicSupportPara.requotify(para, html)); + } + paras = newParas; + + // Remove double blanks/brks + fixBlanksBreaks(paras); + } + + return paras; + } + + /** + * Convert the given line into a single {@link Paragraph}. + * + * @param support + * the linked {@link BasicSupport} + * @param source + * the source URL of the story + * @param line + * the textual content of the paragraph + * @param html + * TRUE if the input content is in HTML mode + * + * @return the {@link Paragraph} + */ + private static Paragraph makeParagraph(BasicSupport support, URL source, + String line, boolean html) { + Image image = null; + if (line.startsWith("[") && line.endsWith("]")) { + image = BasicSupportHelper.getImage(support, source, line + .substring(1, line.length() - 1).trim()); + } + + if (image != null) { + return new Paragraph(image); + } + + return BasicSupportPara.processPara(line, html); + } + + /** + * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of + * those {@link Paragraph}s. + *

+ * The resulting list will not contain a starting or trailing blank/break + * nor 2 blanks or breaks following each other. + * + * @param paras + * the list of {@link Paragraph}s to fix + */ + private static void fixBlanksBreaks(List paras) { + boolean space = false; + boolean brk = true; + for (int i = 0; i < paras.size(); i++) { + Paragraph para = paras.get(i); + boolean thisSpace = para.getType() == ParagraphType.BLANK; + boolean thisBrk = para.getType() == ParagraphType.BREAK; + + if (i > 0 && space && thisBrk) { + paras.remove(i - 1); + i--; + } else if ((space || brk) && (thisSpace || thisBrk)) { + paras.remove(i); + i--; + } + + space = thisSpace; + brk = thisBrk; + } + + // Remove blank/brk at start + if (paras.size() > 0 + && (paras.get(0).getType() == ParagraphType.BLANK || paras.get( + 0).getType() == ParagraphType.BREAK)) { + paras.remove(0); + } + + // Remove blank/brk at end + int last = paras.size() - 1; + if (paras.size() > 0 + && (paras.get(last).getType() == ParagraphType.BLANK || paras + .get(last).getType() == ParagraphType.BREAK)) { + paras.remove(last); + } + } +} diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport_Deprecated.java b/src/be/nikiroo/fanfix/supported/BasicSupport_Deprecated.java new file mode 100644 index 00000000..591ba58d --- /dev/null +++ b/src/be/nikiroo/fanfix/supported/BasicSupport_Deprecated.java @@ -0,0 +1,1325 @@ +package be.nikiroo.fanfix.supported; + +import java.io.BufferedReader; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.net.MalformedURLException; +import java.net.URL; +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.Map.Entry; +import java.util.Scanner; + +import be.nikiroo.fanfix.Instance; +import be.nikiroo.fanfix.bundles.Config; +import be.nikiroo.fanfix.bundles.StringId; +import be.nikiroo.fanfix.data.Chapter; +import be.nikiroo.fanfix.data.MetaData; +import be.nikiroo.fanfix.data.Paragraph; +import be.nikiroo.fanfix.data.Paragraph.ParagraphType; +import be.nikiroo.fanfix.data.Story; +import be.nikiroo.utils.Image; +import be.nikiroo.utils.Progress; +import be.nikiroo.utils.StringUtils; + +/** + * DEPRECATED: use the new Jsoup 'Node' system. + *

+ * This class is the base class used by the other support classes. It can be + * used outside of this package, and have static method that you can use to get + * access to the correct support class. + *

+ * It will be used with 'resources' (usually web pages or files). + * + * @author niki + */ +@Deprecated +public abstract class BasicSupport_Deprecated extends BasicSupport { + private InputStream in; + private URL currentReferer; // with only one 'r', as in 'HTTP'... + + // quote chars + private char openQuote = Instance.getTrans().getCharacter( + StringId.OPEN_SINGLE_QUOTE); + private char closeQuote = Instance.getTrans().getCharacter( + StringId.CLOSE_SINGLE_QUOTE); + private char openDoubleQuote = Instance.getTrans().getCharacter( + StringId.OPEN_DOUBLE_QUOTE); + private char closeDoubleQuote = Instance.getTrans().getCharacter( + StringId.CLOSE_DOUBLE_QUOTE); + + // New methods not used in Deprecated mode + @Override + protected String getDesc() throws IOException { + throw new RuntimeException("should not be used by legacy code"); + } + + @Override + protected MetaData getMeta() throws IOException { + throw new RuntimeException("should not be used by legacy code"); + } + + @Override + protected List> getChapters(Progress pg) + throws IOException { + throw new RuntimeException("should not be used by legacy code"); + } + + @Override + protected String getChapterContent(URL chapUrl, int number, Progress pg) + throws IOException { + throw new RuntimeException("should not be used by legacy code"); + } + + @Override + public Story process(Progress pg) throws IOException { + return process(getSource(), pg); + } + + // + + /** + * Return the {@link MetaData} of this story. + * + * @param source + * the source of the story + * @param in + * the input (the main resource) + * + * @return the associated {@link MetaData}, never NULL + * + * @throws IOException + * in case of I/O error + */ + protected abstract MetaData getMeta(URL source, InputStream in) + throws IOException; + + /** + * Return the story description. + * + * @param source + * the source of the story + * @param in + * the input (the main resource) + * + * @return the description + * + * @throws IOException + * in case of I/O error + */ + protected abstract String getDesc(URL source, InputStream in) + throws IOException; + + /** + * Return the list of chapters (name and resource). + * + * @param source + * the source of the story + * @param in + * the input (the main resource) + * @param pg + * the optional progress reporter + * + * @return the chapters + * + * @throws IOException + * in case of I/O error + */ + protected abstract List> getChapters(URL source, + InputStream in, Progress pg) throws IOException; + + /** + * Return the content of the chapter (possibly HTML encoded, if + * {@link BasicSupport_Deprecated#isHtml()} is TRUE). + * + * @param source + * the source of the story + * @param in + * the input (the main resource) + * @param number + * the chapter number + * @param pg + * the optional progress reporter + * + * @return the content + * + * @throws IOException + * in case of I/O error + */ + protected abstract String getChapterContent(URL source, InputStream in, + int number, Progress pg) throws IOException; + + /** + * Process the given story resource into a partially filled {@link Story} + * object containing the name and metadata, except for the description. + * + * @param url + * the story resource + * + * @return the {@link Story} + * + * @throws IOException + * in case of I/O error + */ + public Story processMeta(URL url) throws IOException { + return processMeta(url, true, false, null); + } + + /** + * Process the given story resource into a partially filled {@link Story} + * object containing the name and metadata. + * + * @param url + * the story resource + * @param close + * close "this" and "in" when done + * @param getDesc + * retrieve the description of the story, or not + * @param pg + * the optional progress reporter + * + * @return the {@link Story}, never NULL + * + * @throws IOException + * in case of I/O error + */ + protected Story processMeta(URL url, boolean close, boolean getDesc, + Progress pg) throws IOException { + if (pg == null) { + pg = new Progress(); + } else { + pg.setMinMax(0, 100); + } + + login(); + pg.setProgress(10); + + url = getCanonicalUrl(url); + + setCurrentReferer(url); + + in = openInput(url); // NULL allowed here + try { + preprocess(url, getInput()); + pg.setProgress(30); + + Story story = new Story(); + MetaData meta = getMeta(url, getInput()); + if (meta.getCreationDate() == null + || meta.getCreationDate().isEmpty()) { + meta.setCreationDate(StringUtils.fromTime(new Date().getTime())); + } + story.setMeta(meta); + + pg.setProgress(50); + + if (meta.getCover() == null) { + meta.setCover(getDefaultCover(meta.getSubject())); + } + + pg.setProgress(60); + + if (getDesc) { + String descChapterName = Instance.getTrans().getString( + StringId.DESCRIPTION); + story.getMeta().setResume( + makeChapter(url, 0, descChapterName, + getDesc(url, getInput()), null)); + } + + pg.setProgress(100); + return story; + } finally { + if (close) { + close(); + + if (in != null) { + in.close(); + } + } + } + } + + /** + * Process the given story resource into a fully filled {@link Story} + * object. + * + * @param url + * the story resource + * @param pg + * the optional progress reporter + * + * @return the {@link Story}, never NULL + * + * @throws IOException + * in case of I/O error + */ + protected Story process(URL url, Progress pg) throws IOException { + if (pg == null) { + pg = new Progress(); + } else { + pg.setMinMax(0, 100); + } + + url = getCanonicalUrl(url); + pg.setProgress(1); + try { + Progress pgMeta = new Progress(); + pg.addProgress(pgMeta, 10); + Story story = processMeta(url, false, true, pgMeta); + if (!pgMeta.isDone()) { + pgMeta.setProgress(pgMeta.getMax()); // 10% + } + + pg.setName("Retrieving " + story.getMeta().getTitle()); + + setCurrentReferer(url); + + Progress pgGetChapters = new Progress(); + pg.addProgress(pgGetChapters, 10); + story.setChapters(new ArrayList()); + List> chapters = getChapters(url, getInput(), + pgGetChapters); + if (!pgGetChapters.isDone()) { + pgGetChapters.setProgress(pgGetChapters.getMax()); // 20% + } + + if (chapters != null) { + Progress pgChaps = new Progress("Extracting chapters", 0, + chapters.size() * 300); + pg.addProgress(pgChaps, 80); + + long words = 0; + int i = 1; + for (Entry chap : chapters) { + pgChaps.setName("Extracting chapter " + i); + InputStream chapIn = null; + if (chap.getValue() != null) { + setCurrentReferer(chap.getValue()); + chapIn = Instance.getCache().open(chap.getValue(), + this, false); + } + pgChaps.setProgress(i * 100); + try { + Progress pgGetChapterContent = new Progress(); + Progress pgMakeChapter = new Progress(); + pgChaps.addProgress(pgGetChapterContent, 100); + pgChaps.addProgress(pgMakeChapter, 100); + + String content = getChapterContent(url, chapIn, i, + pgGetChapterContent); + if (!pgGetChapterContent.isDone()) { + pgGetChapterContent.setProgress(pgGetChapterContent + .getMax()); + } + + Chapter cc = makeChapter(url, i, chap.getKey(), + content, pgMakeChapter); + if (!pgMakeChapter.isDone()) { + pgMakeChapter.setProgress(pgMakeChapter.getMax()); + } + + words += cc.getWords(); + story.getChapters().add(cc); + story.getMeta().setWords(words); + } finally { + if (chapIn != null) { + chapIn.close(); + } + } + + i++; + } + + pgChaps.setName("Extracting chapters"); + } else { + pg.setProgress(80); + } + + return story; + + } finally { + close(); + + if (in != null) { + in.close(); + } + } + } + + /** + * Prepare the support if needed before processing. + * + * @param source + * the source of the story + * @param in + * the input (the main resource) + * + * @throws IOException + * on I/O error + */ + @SuppressWarnings("unused") + protected void preprocess(URL source, InputStream in) throws IOException { + } + + /** + * Create a {@link Chapter} object from the given information, formatting + * the content as it should be. + * + * @param source + * the source of the story + * @param number + * the chapter number + * @param name + * the chapter name + * @param content + * the chapter content + * @param pg + * the optional progress reporter + * + * @return the {@link Chapter} + * + * @throws IOException + * in case of I/O error + */ + protected Chapter makeChapter(URL source, int number, String name, + String content, Progress pg) throws IOException { + // Chapter name: process it correctly, then remove the possible + // redundant "Chapter x: " in front of it, or "-" (as in + // "Chapter 5: - Fun!" after the ": " was automatically added) + String chapterName = processPara(name).getContent().trim(); + for (String lang : Instance.getConfig().getString(Config.CHAPTER) + .split(",")) { + String chapterWord = Instance.getConfig().getStringX( + Config.CHAPTER, lang); + if (chapterName.startsWith(chapterWord)) { + chapterName = chapterName.substring(chapterWord.length()) + .trim(); + break; + } + } + + if (chapterName.startsWith(Integer.toString(number))) { + chapterName = chapterName.substring( + Integer.toString(number).length()).trim(); + } + + while (chapterName.startsWith(":") || chapterName.startsWith("-")) { + chapterName = chapterName.substring(1).trim(); + } + // + + Chapter chap = new Chapter(number, chapterName); + + if (content != null) { + List paras = makeParagraphs(source, content, pg); + long words = 0; + for (Paragraph para : paras) { + words += para.getWords(); + } + chap.setParagraphs(paras); + chap.setWords(words); + } + + return chap; + + } + + /** + * Convert the given content into {@link Paragraph}s. + * + * @param source + * the source URL of the story + * @param content + * the textual content + * @param pg + * the optional progress reporter + * + * @return the {@link Paragraph}s + * + * @throws IOException + * in case of I/O error + */ + protected List makeParagraphs(URL source, String content, + Progress pg) throws IOException { + if (pg == null) { + pg = new Progress(); + } + + if (isHtml()) { + // Special


processing: + content = content.replaceAll("(
]*>)|(
)|(
)", + "
* * *
"); + } + + List paras = new ArrayList(); + + if (content != null && !content.trim().isEmpty()) { + if (isHtml()) { + String[] tab = content.split("(

|

|
|
)"); + pg.setMinMax(0, tab.length); + int i = 1; + for (String line : tab) { + if (line.startsWith("[") && line.endsWith("]")) { + pg.setName("Extracting image " + i); + } + paras.add(makeParagraph(source, line.trim())); + pg.setProgress(i++); + } + pg.setName(null); + } else { + List lines = new ArrayList(); + BufferedReader buff = null; + try { + buff = new BufferedReader( + new InputStreamReader(new ByteArrayInputStream( + content.getBytes("UTF-8")), "UTF-8")); + for (String line = buff.readLine(); line != null; line = buff + .readLine()) { + lines.add(line.trim()); + } + } finally { + if (buff != null) { + buff.close(); + } + } + + pg.setMinMax(0, lines.size()); + int i = 0; + for (String line : lines) { + if (line.startsWith("[") && line.endsWith("]")) { + pg.setName("Extracting image " + i); + } + paras.add(makeParagraph(source, line)); + pg.setProgress(i++); + } + pg.setName(null); + } + + // Check quotes for "bad" format + List newParas = new ArrayList(); + for (Paragraph para : paras) { + newParas.addAll(requotify(para)); + } + paras = newParas; + + // Remove double blanks/brks + fixBlanksBreaks(paras); + } + + return paras; + } + + /** + * Convert the given line into a single {@link Paragraph}. + * + * @param source + * the source URL of the story + * @param line + * the textual content of the paragraph + * + * @return the {@link Paragraph} + */ + private Paragraph makeParagraph(URL source, String line) { + Image image = null; + if (line.startsWith("[") && line.endsWith("]")) { + image = getImage(this, source, line.substring(1, line.length() - 1) + .trim()); + } + + if (image != null) { + return new Paragraph(image); + } + + return processPara(line); + } + + /** + * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of + * those {@link Paragraph}s. + *

+ * The resulting list will not contain a starting or trailing blank/break + * nor 2 blanks or breaks following each other. + * + * @param paras + * the list of {@link Paragraph}s to fix + */ + protected void fixBlanksBreaks(List paras) { + boolean space = false; + boolean brk = true; + for (int i = 0; i < paras.size(); i++) { + Paragraph para = paras.get(i); + boolean thisSpace = para.getType() == ParagraphType.BLANK; + boolean thisBrk = para.getType() == ParagraphType.BREAK; + + if (i > 0 && space && thisBrk) { + paras.remove(i - 1); + i--; + } else if ((space || brk) && (thisSpace || thisBrk)) { + paras.remove(i); + i--; + } + + space = thisSpace; + brk = thisBrk; + } + + // Remove blank/brk at start + if (paras.size() > 0 + && (paras.get(0).getType() == ParagraphType.BLANK || paras.get( + 0).getType() == ParagraphType.BREAK)) { + paras.remove(0); + } + + // Remove blank/brk at end + int last = paras.size() - 1; + if (paras.size() > 0 + && (paras.get(last).getType() == ParagraphType.BLANK || paras + .get(last).getType() == ParagraphType.BREAK)) { + paras.remove(last); + } + } + + /** + * Get the default cover related to this subject (see .info files). + * + * @param subject + * the subject + * + * @return the cover if any, or NULL + */ + static Image getDefaultCover(String subject) { + if (subject != null && !subject.isEmpty() + && Instance.getCoverDir() != null) { + try { + File fileCover = new File(Instance.getCoverDir(), subject); + return getImage(null, fileCover.toURI().toURL(), subject); + } catch (MalformedURLException e) { + } + } + + return null; + } + + /** + * Return the list of supported image extensions. + * + * @param emptyAllowed + * TRUE to allow an empty extension on first place, which can be + * used when you may already have an extension in your input but + * are not sure about it + * + * @return the extensions + */ + static String[] getImageExt(boolean emptyAllowed) { + if (emptyAllowed) { + return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" }; + } + + return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" }; + } + + /** + * Check if the given resource can be a local image or a remote image, then + * refresh the cache with it if it is. + * + * @param source + * the story source + * @param line + * the resource to check + * + * @return the image if found, or NULL + * + */ + static Image getImage(BasicSupport_Deprecated support, URL source, + String line) { + URL url = getImageUrl(support, source, line); + if (url != null) { + if ("file".equals(url.getProtocol())) { + if (new File(url.getPath()).isDirectory()) { + return null; + } + } + InputStream in = null; + try { + in = Instance.getCache().open(url, getSupport(url), true); + return new Image(in); + } catch (IOException e) { + } finally { + if (in != null) { + try { + in.close(); + } catch (IOException e) { + } + } + } + } + + return null; + } + + /** + * Check if the given resource can be a local image or a remote image, then + * refresh the cache with it if it is. + * + * @param source + * the story source + * @param line + * the resource to check + * + * @return the image URL if found, or NULL + * + */ + static URL getImageUrl(BasicSupport_Deprecated support, URL source, + String line) { + URL url = null; + + if (line != null) { + // try for files + if (source != null) { + try { + + String relPath = null; + String absPath = null; + try { + String path = new File(source.getFile()).getParent(); + relPath = new File(new File(path), line.trim()) + .getAbsolutePath(); + } catch (Exception e) { + // Cannot be converted to path (one possibility to take + // into account: absolute path on Windows) + } + try { + absPath = new File(line.trim()).getAbsolutePath(); + } catch (Exception e) { + // Cannot be converted to path (at all) + } + + for (String ext : getImageExt(true)) { + File absFile = new File(absPath + ext); + File relFile = new File(relPath + ext); + if (absPath != null && absFile.exists() + && absFile.isFile()) { + url = absFile.toURI().toURL(); + } else if (relPath != null && relFile.exists() + && relFile.isFile()) { + url = relFile.toURI().toURL(); + } + } + } catch (Exception e) { + // Should not happen since we control the correct arguments + } + } + + if (url == null) { + // try for URLs + try { + for (String ext : getImageExt(true)) { + if (Instance.getCache() + .check(new URL(line + ext), true)) { + url = new URL(line + ext); + break; + } + } + + // try out of cache + if (url == null) { + for (String ext : getImageExt(true)) { + try { + url = new URL(line + ext); + Instance.getCache().refresh(url, support, true); + break; + } catch (IOException e) { + // no image with this ext + url = null; + } + } + } + } catch (MalformedURLException e) { + // Not an url + } + } + + // refresh the cached file + if (url != null) { + try { + Instance.getCache().refresh(url, support, true); + } catch (IOException e) { + // woops, broken image + url = null; + } + } + } + + return url; + } + + /** + * Open the input file that will be used through the support. + *

+ * Can return NULL, in which case you are supposed to work without an + * {@link InputStream}. + * + * @param source + * the source {@link URL} + * + * @return the {@link InputStream} + * + * @throws IOException + * in case of I/O error + */ + protected InputStream openInput(URL source) throws IOException { + return Instance.getCache().open(source, this, false); + } + + /** + * Reset then return {@link BasicSupport_Deprecated#in}. + * + * @return {@link BasicSupport_Deprecated#in} + */ + protected InputStream getInput() { + return reset(in); + } + + /** + * Check quotes for bad format (i.e., quotes with normal paragraphs inside) + * and requotify them (i.e., separate them into QUOTE paragraphs and other + * paragraphs (quotes or not)). + * + * @param para + * the paragraph to requotify (not necessarily a quote) + * + * @return the correctly (or so we hope) quotified paragraphs + */ + protected List requotify(Paragraph para) { + List newParas = new ArrayList(); + + if (para.getType() == ParagraphType.QUOTE + && para.getContent().length() > 2) { + String line = para.getContent(); + boolean singleQ = line.startsWith("" + openQuote); + boolean doubleQ = line.startsWith("" + openDoubleQuote); + + // Do not try when more than one quote at a time + // (some stories are not easily readable if we do) + if (singleQ + && line.indexOf(closeQuote, 1) < line + .lastIndexOf(closeQuote)) { + newParas.add(para); + return newParas; + } + if (doubleQ + && line.indexOf(closeDoubleQuote, 1) < line + .lastIndexOf(closeDoubleQuote)) { + newParas.add(para); + return newParas; + } + // + + if (!singleQ && !doubleQ) { + line = openDoubleQuote + line + closeDoubleQuote; + newParas.add(new Paragraph(ParagraphType.QUOTE, line, para + .getWords())); + } else { + char open = singleQ ? openQuote : openDoubleQuote; + char close = singleQ ? closeQuote : closeDoubleQuote; + + int posDot = -1; + boolean inQuote = false; + int i = 0; + for (char car : line.toCharArray()) { + if (car == open) { + inQuote = true; + } else if (car == close) { + inQuote = false; + } else if (car == '.' && !inQuote) { + posDot = i; + break; + } + i++; + } + + if (posDot >= 0) { + String rest = line.substring(posDot + 1).trim(); + line = line.substring(0, posDot + 1).trim(); + long words = 1; + for (char car : line.toCharArray()) { + if (car == ' ') { + words++; + } + } + newParas.add(new Paragraph(ParagraphType.QUOTE, line, words)); + if (!rest.isEmpty()) { + newParas.addAll(requotify(processPara(rest))); + } + } else { + newParas.add(para); + } + } + } else { + newParas.add(para); + } + + return newParas; + } + + /** + * Process a {@link Paragraph} from a raw line of text. + *

+ * Will also fix quotes and HTML encoding if needed. + * + * @param line + * the raw line + * + * @return the processed {@link Paragraph} + */ + protected Paragraph processPara(String line) { + line = ifUnhtml(line).trim(); + + boolean space = true; + boolean brk = true; + boolean quote = false; + boolean tentativeCloseQuote = false; + char prev = '\0'; + int dashCount = 0; + long words = 1; + + StringBuilder builder = new StringBuilder(); + for (char car : line.toCharArray()) { + if (car != '-') { + if (dashCount > 0) { + // dash, ndash and mdash: - – — + // currently: always use mdash + builder.append(dashCount == 1 ? '-' : '—'); + } + dashCount = 0; + } + + if (tentativeCloseQuote) { + tentativeCloseQuote = false; + if (Character.isLetterOrDigit(car)) { + builder.append("'"); + } else { + // handle double-single quotes as double quotes + if (prev == car) { + builder.append(closeDoubleQuote); + continue; + } + + builder.append(closeQuote); + } + } + + switch (car) { + case ' ': // note: unbreakable space + case ' ': + case '\t': + case '\n': // just in case + case '\r': // just in case + if (builder.length() > 0 + && builder.charAt(builder.length() - 1) != ' ') { + words++; + } + builder.append(' '); + break; + + case '\'': + if (space || (brk && quote)) { + quote = true; + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(openDoubleQuote); + } else { + builder.append(openQuote); + } + } else if (prev == ' ' || prev == car) { + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(openDoubleQuote); + } else { + builder.append(openQuote); + } + } else { + // it is a quote ("I'm off") or a 'quote' ("This + // 'good' restaurant"...) + tentativeCloseQuote = true; + } + break; + + case '"': + if (space || (brk && quote)) { + quote = true; + builder.append(openDoubleQuote); + } else if (prev == ' ') { + builder.append(openDoubleQuote); + } else { + builder.append(closeDoubleQuote); + } + break; + + case '-': + if (space) { + quote = true; + } else { + dashCount++; + } + space = false; + break; + + case '*': + case '~': + case '/': + case '\\': + case '<': + case '>': + case '=': + case '+': + case '_': + case '–': + case '—': + space = false; + builder.append(car); + break; + + case '‘': + case '`': + case '‹': + case '﹁': + case '〈': + case '「': + if (space || (brk && quote)) { + quote = true; + builder.append(openQuote); + } else { + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(openDoubleQuote); + } else { + builder.append(openQuote); + } + } + space = false; + brk = false; + break; + + case '’': + case '›': + case '﹂': + case '〉': + case '」': + space = false; + brk = false; + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(closeDoubleQuote); + } else { + builder.append(closeQuote); + } + break; + + case '«': + case '“': + case '﹃': + case '《': + case '『': + if (space || (brk && quote)) { + quote = true; + builder.append(openDoubleQuote); + } else { + builder.append(openDoubleQuote); + } + space = false; + brk = false; + break; + + case '»': + case '”': + case '﹄': + case '》': + case '』': + space = false; + brk = false; + builder.append(closeDoubleQuote); + break; + + default: + space = false; + brk = false; + builder.append(car); + break; + } + + prev = car; + } + + if (tentativeCloseQuote) { + tentativeCloseQuote = false; + builder.append(closeQuote); + } + + line = builder.toString().trim(); + + ParagraphType type = ParagraphType.NORMAL; + if (space) { + type = ParagraphType.BLANK; + } else if (brk) { + type = ParagraphType.BREAK; + } else if (quote) { + type = ParagraphType.QUOTE; + } + + return new Paragraph(type, line, words); + } + + /** + * Remove the HTML from the input if + * {@link BasicSupport_Deprecated#isHtml()} is true. + * + * @param input + * the input + * + * @return the no html version if needed + */ + private String ifUnhtml(String input) { + if (isHtml() && input != null) { + return StringUtils.unhtml(input); + } + + return input; + } + + /** + * Reset the given {@link InputStream} and return it. + * + * @param in + * the {@link InputStream} to reset + * + * @return the same {@link InputStream} after reset + */ + static protected InputStream reset(InputStream in) { + try { + if (in != null) { + in.reset(); + } + } catch (IOException e) { + } + + return in; + } + + /** + * Return the first line from the given input which correspond to the given + * selectors. + * + * @param in + * the input + * @param needle + * a string that must be found inside the target line (also + * supports "^" at start to say "only if it starts with" the + * needle) + * @param relativeLine + * the line to return based upon the target line position (-1 = + * the line before, 0 = the target line...) + * + * @return the line + */ + static protected String getLine(InputStream in, String needle, + int relativeLine) { + return getLine(in, needle, relativeLine, true); + } + + /** + * Return a line from the given input which correspond to the given + * selectors. + * + * @param in + * the input + * @param needle + * a string that must be found inside the target line (also + * supports "^" at start to say "only if it starts with" the + * needle) + * @param relativeLine + * the line to return based upon the target line position (-1 = + * the line before, 0 = the target line...) + * @param first + * takes the first result (as opposed to the last one, which will + * also always spend the input) + * + * @return the line + */ + static protected String getLine(InputStream in, String needle, + int relativeLine, boolean first) { + String rep = null; + + reset(in); + + List lines = new ArrayList(); + @SuppressWarnings("resource") + Scanner scan = new Scanner(in, "UTF-8"); + int index = -1; + scan.useDelimiter("\\n"); + while (scan.hasNext()) { + lines.add(scan.next()); + + if (index == -1) { + if (needle.startsWith("^")) { + if (lines.get(lines.size() - 1).startsWith( + needle.substring(1))) { + index = lines.size() - 1; + } + + } else { + if (lines.get(lines.size() - 1).contains(needle)) { + index = lines.size() - 1; + } + } + } + + if (index >= 0 && index + relativeLine < lines.size()) { + rep = lines.get(index + relativeLine); + if (first) { + break; + } + } + } + + return rep; + } + + /** + * Return the text between the key and the endKey (and optional subKey can + * be passed, in this case we will look for the key first, then take the + * text between the subKey and the endKey). + *

+ * Will only match the first line with the given key if more than one are + * possible. Which also means that if the subKey or endKey is not found on + * that line, NULL will be returned. + * + * @param in + * the input + * @param key + * the key to match (also supports "^" at start to say + * "only if it starts with" the key) + * @param subKey + * the sub key or NULL if none + * @param endKey + * the end key or NULL for "up to the end" + * @return the text or NULL if not found + */ + static protected String getKeyLine(InputStream in, String key, + String subKey, String endKey) { + return getKeyText(getLine(in, key, 0), key, subKey, endKey); + } + + /** + * Return the text between the key and the endKey (and optional subKey can + * be passed, in this case we will look for the key first, then take the + * text between the subKey and the endKey). + * + * @param in + * the input + * @param key + * the key to match (also supports "^" at start to say + * "only if it starts with" the key) + * @param subKey + * the sub key or NULL if none + * @param endKey + * the end key or NULL for "up to the end" + * @return the text or NULL if not found + */ + static protected String getKeyText(String in, String key, String subKey, + String endKey) { + String result = null; + + String line = in; + if (line != null && line.contains(key)) { + line = line.substring(line.indexOf(key) + key.length()); + if (subKey == null || subKey.isEmpty() || line.contains(subKey)) { + if (subKey != null) { + line = line.substring(line.indexOf(subKey) + + subKey.length()); + } + if (endKey == null || line.contains(endKey)) { + if (endKey != null) { + line = line.substring(0, line.indexOf(endKey)); + result = line; + } + } + } + } + + return result; + } + + /** + * Return the text between the key and the endKey (optional subKeys can be + * passed, in this case we will look for the subKeys first, then take the + * text between the key and the endKey). + * + * @param in + * the input + * @param key + * the key to match + * @param endKey + * the end key or NULL for "up to the end" + * @param afters + * the sub-keys to find before checking for key/endKey + * + * @return the text or NULL if not found + */ + static protected String getKeyTextAfter(String in, String key, + String endKey, String... afters) { + + if (in != null && !in.isEmpty()) { + int pos = indexOfAfter(in, 0, afters); + if (pos < 0) { + return null; + } + + in = in.substring(pos); + } + + return getKeyText(in, key, null, endKey); + } + + /** + * Return the first index after all the given "afters" have been found in + * the {@link String}, or -1 if it was not possible. + * + * @param in + * the input + * @param startAt + * start at this position in the string + * @param afters + * the sub-keys to find before checking for key/endKey + * + * @return the text or NULL if not found + */ + static protected int indexOfAfter(String in, int startAt, String... afters) { + int pos = -1; + if (in != null && !in.isEmpty()) { + pos = startAt; + if (afters != null) { + for (int i = 0; pos >= 0 && i < afters.length; i++) { + String subKey = afters[i]; + if (!subKey.isEmpty()) { + pos = in.indexOf(subKey, pos); + if (pos >= 0) { + pos += subKey.length(); + } + } + } + } + } + + return pos; + } +} diff --git a/src/be/nikiroo/fanfix/supported/E621.java b/src/be/nikiroo/fanfix/supported/E621.java index 72c68b1a..5fe99a8f 100644 --- a/src/be/nikiroo/fanfix/supported/E621.java +++ b/src/be/nikiroo/fanfix/supported/E621.java @@ -26,7 +26,7 @@ import be.nikiroo.utils.StringUtils; * * @author niki */ -class E621 extends BasicSupport { +class E621 extends BasicSupport_Deprecated { @Override public String getSourceName() { return "e621.net"; diff --git a/src/be/nikiroo/fanfix/supported/EHentai.java b/src/be/nikiroo/fanfix/supported/EHentai.java index c082b914..574ea065 100644 --- a/src/be/nikiroo/fanfix/supported/EHentai.java +++ b/src/be/nikiroo/fanfix/supported/EHentai.java @@ -23,7 +23,7 @@ import be.nikiroo.utils.StringUtils; * * @author niki */ -class EHentai extends BasicSupport { +class EHentai extends BasicSupport_Deprecated { @Override public String getSourceName() { return "e-hentai.org"; diff --git a/src/be/nikiroo/fanfix/supported/Epub.java b/src/be/nikiroo/fanfix/supported/Epub.java index ba56cfb7..9b06f202 100644 --- a/src/be/nikiroo/fanfix/supported/Epub.java +++ b/src/be/nikiroo/fanfix/supported/Epub.java @@ -200,7 +200,7 @@ class Epub extends InfoText { } @Override - protected void close() throws IOException { + protected void close() { if (tmp != null && tmp.exists()) { if (!tmp.delete()) { tmp.deleteOnExit(); @@ -210,7 +210,11 @@ class Epub extends InfoText { tmp = null; if (fakeIn != null) { - fakeIn.close(); + try { + fakeIn.close(); + } catch (Exception e) { + Instance.getTraceHandler().error(e); + } } super.close(); diff --git a/src/be/nikiroo/fanfix/supported/Fanfiction.java b/src/be/nikiroo/fanfix/supported/Fanfiction.java index b80156d4..0feb9641 100644 --- a/src/be/nikiroo/fanfix/supported/Fanfiction.java +++ b/src/be/nikiroo/fanfix/supported/Fanfiction.java @@ -25,7 +25,7 @@ import be.nikiroo.utils.StringUtils; * * @author niki */ -class Fanfiction extends BasicSupport { +class Fanfiction extends BasicSupport_Deprecated { @Override protected boolean isHtml() { return true; @@ -142,7 +142,7 @@ class Fanfiction extends BasicSupport { } } - return fixAuthor(author); + return BasicSupportHelper.fixAuthor(author); } private String getDate(InputStream in) { diff --git a/src/be/nikiroo/fanfix/supported/Fimfiction.java b/src/be/nikiroo/fanfix/supported/Fimfiction.java index 7d2eec44..ed9c9adf 100644 --- a/src/be/nikiroo/fanfix/supported/Fimfiction.java +++ b/src/be/nikiroo/fanfix/supported/Fimfiction.java @@ -23,7 +23,7 @@ import be.nikiroo.utils.StringUtils; * * @author niki */ -class Fimfiction extends BasicSupport { +class Fimfiction extends BasicSupport_Deprecated { @Override protected boolean isHtml() { return true; diff --git a/src/be/nikiroo/fanfix/supported/FimfictionApi.java b/src/be/nikiroo/fanfix/supported/FimfictionApi.java index ee436f16..231405b9 100644 --- a/src/be/nikiroo/fanfix/supported/FimfictionApi.java +++ b/src/be/nikiroo/fanfix/supported/FimfictionApi.java @@ -23,7 +23,7 @@ import be.nikiroo.utils.Progress; * * @author niki */ -class FimfictionApi extends BasicSupport { +class FimfictionApi extends BasicSupport_Deprecated { private String oauth; private String storyId; private String json; diff --git a/src/be/nikiroo/fanfix/supported/Html.java b/src/be/nikiroo/fanfix/supported/Html.java index fffbcd7f..8dec5f75 100644 --- a/src/be/nikiroo/fanfix/supported/Html.java +++ b/src/be/nikiroo/fanfix/supported/Html.java @@ -6,6 +6,8 @@ import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URL; +import be.nikiroo.fanfix.Instance; + /** * Support class for HTML files created with this program (as we need some * metadata available in those we create). @@ -34,17 +36,17 @@ class Html extends InfoText { } @Override - public URL getCanonicalUrl(URL source) throws IOException { + public URL getCanonicalUrl(URL source) { if (source.toString().endsWith(File.separator + "index.html")) { try { File fakeFile = new File(source.toURI()); // "story/index.html" fakeFile = new File(fakeFile.getParent()); // "story" fakeFile = new File(fakeFile, fakeFile.getName()); // "story/story" return fakeFile.toURI().toURL(); - } catch (URISyntaxException e) { - throw new IOException( - "file not supported (maybe not created with this program or corrupt)", - e); + } catch (Exception e) { + Instance.getTraceHandler().error( + new IOException("Cannot find the right URL for " + + source, e)); } } diff --git a/src/be/nikiroo/fanfix/supported/InfoReader.java b/src/be/nikiroo/fanfix/supported/InfoReader.java index b65256ca..5203cc86 100644 --- a/src/be/nikiroo/fanfix/supported/InfoReader.java +++ b/src/be/nikiroo/fanfix/supported/InfoReader.java @@ -58,7 +58,7 @@ public class InfoReader { if (withCover) { String infoTag = getInfoTag(in, "COVER"); if (infoTag != null && !infoTag.trim().isEmpty()) { - meta.setCover(BasicSupport.getImage(null, sourceInfoFile, + meta.setCover(BasicSupportHelper.getImage(null, sourceInfoFile, infoTag)); } // Second chance: try to check for a cover next to the info file @@ -70,8 +70,8 @@ public class InfoReader { + Instance.getConfig() .getString(Config.IMAGE_FORMAT_COVER) .toLowerCase(); - meta.setCover(BasicSupport.getImage(null, sourceInfoFile, - info + ext)); + meta.setCover(BasicSupportHelper.getImage(null, + sourceInfoFile, info + ext)); } } } @@ -84,7 +84,7 @@ public class InfoReader { meta.setFakeCover(Boolean.parseBoolean(getInfoTag(in, "FAKE_COVER"))); if (withCover && meta.getCover() == null) { - meta.setCover(BasicSupport.getDefaultCover(meta.getSubject())); + meta.setCover(BasicSupportHelper.getDefaultCover(meta.getSubject())); } return meta; @@ -138,7 +138,7 @@ public class InfoReader { if (in != null) { in.reset(); - String value = BasicSupport.getLine(in, key, 0); + String value = BasicSupport_Deprecated.getLine(in, key, 0); if (value != null && !value.isEmpty()) { value = value.trim().substring(key.length() - 1).trim(); if (value.startsWith("'") && value.endsWith("'") diff --git a/src/be/nikiroo/fanfix/supported/MangaFox.java b/src/be/nikiroo/fanfix/supported/MangaFox.java index 8d31cb83..8fc1965d 100644 --- a/src/be/nikiroo/fanfix/supported/MangaFox.java +++ b/src/be/nikiroo/fanfix/supported/MangaFox.java @@ -16,7 +16,7 @@ import be.nikiroo.utils.Image; import be.nikiroo.utils.Progress; import be.nikiroo.utils.StringUtils; -class MangaFox extends BasicSupport { +class MangaFox extends BasicSupport_Deprecated { @Override protected boolean isHtml() { return true; @@ -376,6 +376,9 @@ class MangaFox extends BasicSupport { @Override protected boolean supports(URL url) { + // Broken code (see MangaFoxNew) + if (true) + return false; return "mangafox.me".equals(url.getHost()) || "www.mangafox.me".equals(url.getHost()); } diff --git a/src/be/nikiroo/fanfix/supported/SupportType.java b/src/be/nikiroo/fanfix/supported/SupportType.java new file mode 100644 index 00000000..2c925629 --- /dev/null +++ b/src/be/nikiroo/fanfix/supported/SupportType.java @@ -0,0 +1,117 @@ +package be.nikiroo.fanfix.supported; + +import be.nikiroo.fanfix.Instance; +import be.nikiroo.fanfix.bundles.StringId; + +/** + * The supported input types for which we can get a {@link BasicSupport} object. + * + * @author niki + */ +public enum SupportType { + /** EPUB files created with this program */ + EPUB, + /** Pure text file with some rules */ + TEXT, + /** TEXT but with associated .info file */ + INFO_TEXT, + /** My Little Pony fanfictions */ + FIMFICTION, + /** Fanfictions from a lot of different universes */ + FANFICTION, + /** Website with lots of Mangas */ + MANGAFOX, + /** Furry website with comics support */ + E621, + /** Furry website with stories */ + YIFFSTAR, + /** Comics and images groups, mostly but not only NSFW */ + E_HENTAI, + /** CBZ files */ + CBZ, + /** HTML files */ + HTML; + + /** + * A description of this support type (more information than the + * {@link BasicSupport#getSourceName()}). + * + * @return the description + */ + public String getDesc() { + String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC, + this.name()); + + if (desc == null) { + desc = Instance.getTrans().getString(StringId.INPUT_DESC, this); + } + + return desc; + } + + /** + * The name of this support type (a short version). + * + * @return the name + */ + public String getSourceName() { + BasicSupport support = BasicSupport.getSupport(this, null); + if (support != null) { + return support.getSourceName(); + } + + return null; + } + + @Override + public String toString() { + return super.toString().toLowerCase(); + } + + /** + * Call {@link SupportType#valueOf(String)} after conversion to upper case. + * + * @param typeName + * the possible type name + * + * @return NULL or the type + */ + public static SupportType valueOfUC(String typeName) { + return SupportType.valueOf(typeName == null ? null : typeName + .toUpperCase()); + } + + /** + * Call {@link SupportType#valueOf(String)} after conversion to upper case + * but return NULL for NULL instead of raising exception. + * + * @param typeName + * the possible type name + * + * @return NULL or the type + */ + public static SupportType valueOfNullOkUC(String typeName) { + if (typeName == null) { + return null; + } + + return SupportType.valueOfUC(typeName); + } + + /** + * Call {@link SupportType#valueOf(String)} after conversion to upper case + * but return NULL in case of error instead of raising an exception. + * + * @param typeName + * the possible type name + * + * @return NULL or the type + */ + public static SupportType valueOfAllOkUC(String typeName) { + try { + return SupportType.valueOfUC(typeName); + } catch (Exception e) { + return null; + } + } +} \ No newline at end of file diff --git a/src/be/nikiroo/fanfix/supported/Text.java b/src/be/nikiroo/fanfix/supported/Text.java index 79ee8518..14379113 100644 --- a/src/be/nikiroo/fanfix/supported/Text.java +++ b/src/be/nikiroo/fanfix/supported/Text.java @@ -33,7 +33,7 @@ import be.nikiroo.utils.Progress; * * @author niki */ -class Text extends BasicSupport { +class Text extends BasicSupport_Deprecated { @Override protected boolean isHtml() { return false; @@ -123,7 +123,7 @@ class Text extends BasicSupport { author = authorDate.substring(0, pos); } - return fixAuthor(author); + return BasicSupportHelper.fixAuthor(author); } private String getDate(InputStream in) { diff --git a/src/be/nikiroo/fanfix/supported/YiffStar.java b/src/be/nikiroo/fanfix/supported/YiffStar.java index a612d4f2..ca331e5a 100644 --- a/src/be/nikiroo/fanfix/supported/YiffStar.java +++ b/src/be/nikiroo/fanfix/supported/YiffStar.java @@ -24,7 +24,7 @@ import be.nikiroo.utils.StringUtils; * * @author niki */ -class YiffStar extends BasicSupport { +class YiffStar extends BasicSupport_Deprecated { @Override public String getSourceName() { @@ -93,19 +93,23 @@ class YiffStar extends BasicSupport { } @Override - public URL getCanonicalUrl(URL source) throws IOException { - if (source.getPath().startsWith("/view")) { - source = new URL(source.toString() + "/guest"); - InputStream in = Instance.getCache().open(source, this, false); - String line = getLine(in, "/browse/folder/", 0); - if (line != null) { - String[] tab = line.split("\""); - if (tab.length > 1) { - String groupUrl = source.getProtocol() + "://" - + source.getHost() + tab[1]; - return guest(groupUrl); + public URL getCanonicalUrl(URL source) { + try { + if (source.getPath().startsWith("/view")) { + source = new URL(source.toString() + "/guest"); + InputStream in = Instance.getCache().open(source, this, false); + String line = getLine(in, "/browse/folder/", 0); + if (line != null) { + String[] tab = line.split("\""); + if (tab.length > 1) { + String groupUrl = source.getProtocol() + "://" + + source.getHost() + tab[1]; + return guest(groupUrl); + } } } + } catch (Exception e) { + Instance.getTraceHandler().error(e); } return super.getCanonicalUrl(source); diff --git a/src/be/nikiroo/fanfix/test/BasicSupportTest.java b/src/be/nikiroo/fanfix/test/BasicSupportTest.java index dba0ef02..a3f5221d 100644 --- a/src/be/nikiroo/fanfix/test/BasicSupportTest.java +++ b/src/be/nikiroo/fanfix/test/BasicSupportTest.java @@ -15,7 +15,8 @@ import be.nikiroo.fanfix.data.Paragraph; import be.nikiroo.fanfix.data.Paragraph.ParagraphType; import be.nikiroo.fanfix.data.Story; import be.nikiroo.fanfix.supported.BasicSupport; -import be.nikiroo.fanfix.supported.BasicSupport.SupportType; +import be.nikiroo.fanfix.supported.BasicSupport_Deprecated; +import be.nikiroo.fanfix.supported.SupportType; import be.nikiroo.utils.IOUtils; import be.nikiroo.utils.Progress; import be.nikiroo.utils.test.TestCase; @@ -334,11 +335,10 @@ class BasicSupportTest extends TestLauncher { @Override public void test() throws Exception { - BasicSupport support = BasicSupport - .getSupport(SupportType.TEXT); + BasicSupport support = BasicSupport.getSupport( + SupportType.TEXT, tmp.toURI().toURL()); - Story story = support - .process(tmp.toURI().toURL(), null); + Story story = support.process(null); assertEquals(2, story.getChapters().size()); assertEquals(1, story.getChapters().get(1) @@ -377,11 +377,10 @@ class BasicSupportTest extends TestLauncher { @Override public void test() throws Exception { - BasicSupport support = BasicSupport - .getSupport(SupportType.TEXT); + BasicSupport support = BasicSupport.getSupport( + SupportType.TEXT, tmp.toURI().toURL()); - Story story = support - .process(tmp.toURI().toURL(), null); + Story story = support.process(null); assertEquals(2, story.getChapters().size()); assertEquals(1, story.getChapters().get(1) @@ -394,7 +393,7 @@ class BasicSupportTest extends TestLauncher { }); } - private class BasicSupportEmpty extends BasicSupport { + private class BasicSupportEmpty extends BasicSupport_Deprecated { @Override protected String getSourceName() { return null;