Nécessaires :
- libs/nikiroo-utils-sources.jar: quelques utilitaires partagés
- [libs/unbescape-sources.jar](https://github.com/unbescape/unbescape): une librairie sympathique pour convertir du texte depuis/vers beaucoup de formats ; utilisée ici pour la partie HTML
+- [libs/jsoup-sources.jar](https://jsoup.org/): une libraririe pour parser du HTML
Optionnelles :
- [libs/jexer-sources.jar](https://github.com/klamonte/jexer): une petite librairie qui offre des widgets en mode TUI
- libs/nikiroo-utils-sources.jar: some shared utility functions
- [libs/unbescape-sources.jar](https://github.com/unbescape/unbescape): a nice library to escape/unescape a lot of text formats; used here for HTML
- [libs/jexer-sources.jar](https://github.com/klamonte/jexer): a small library that offers TUI widgets
+- [libs/jsoup-sources.jar](https://jsoup.org/): a library to parse HTML
Nothing else but Java 1.6+.
import be.nikiroo.fanfix.reader.Reader;
import be.nikiroo.fanfix.reader.Reader.ReaderType;
import be.nikiroo.fanfix.supported.BasicSupport;
-import be.nikiroo.fanfix.supported.BasicSupport.SupportType;
+import be.nikiroo.fanfix.supported.SupportType;
import be.nikiroo.utils.Progress;
import be.nikiroo.utils.Version;
import be.nikiroo.utils.serial.server.ServerObject;
pg.addProgress(pgOut, 1);
}
- Story story = support.process(source, pgIn);
+ Story story = support.process(pgIn);
try {
target = new File(target).getAbsolutePath();
BasicOutput.getOutput(type, infoCover, infoCover)
*/
public Locale getLanguage() {
return getLocaleFor(lang);
-
}
/**
import be.nikiroo.fanfix.output.BasicOutput;
import be.nikiroo.fanfix.output.BasicOutput.OutputType;
import be.nikiroo.fanfix.supported.BasicSupport;
-import be.nikiroo.fanfix.supported.BasicSupport.SupportType;
+import be.nikiroo.fanfix.supported.SupportType;
import be.nikiroo.utils.Image;
import be.nikiroo.utils.Progress;
.getType());
URL url = file.toURI().toURL();
if (type != null) {
- story = BasicSupport.getSupport(type).process(url,
- pgProcess);
+ story = BasicSupport.getSupport(type, url) //
+ .process(pgProcess);
// Because we do not want to clear the meta cache:
meta.setCover(story.getMeta().getCover());
story.setMeta(meta);
throw new UnknownHostException("" + url);
}
- return save(support.process(url, pg), null);
+ return save(support.process(pg), null);
}
/**
throw new IOException("URL not supported: " + source.toString());
}
- story = support.process(source, pg);
+ story = support.process(pg);
if (story == null) {
throw new IOException(
"Cannot retrieve story from external source: "
package be.nikiroo.fanfix.supported;
-import java.io.BufferedReader;
-import java.io.ByteArrayInputStream;
-import java.io.File;
import java.io.IOException;
import java.io.InputStream;
-import java.io.InputStreamReader;
-import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
-import java.util.Scanner;
+
+import org.jsoup.helper.DataUtil;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
import be.nikiroo.fanfix.Instance;
-import be.nikiroo.fanfix.bundles.Config;
import be.nikiroo.fanfix.bundles.StringId;
import be.nikiroo.fanfix.data.Chapter;
import be.nikiroo.fanfix.data.MetaData;
-import be.nikiroo.fanfix.data.Paragraph;
-import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
import be.nikiroo.fanfix.data.Story;
-import be.nikiroo.utils.Image;
import be.nikiroo.utils.Progress;
import be.nikiroo.utils.StringUtils;
* @author niki
*/
public abstract class BasicSupport {
- /**
- * The supported input types for which we can get a {@link BasicSupport}
- * object.
- *
- * @author niki
- */
- public enum SupportType {
- /** EPUB files created with this program */
- EPUB,
- /** Pure text file with some rules */
- TEXT,
- /** TEXT but with associated .info file */
- INFO_TEXT,
- /** My Little Pony fanfictions */
- FIMFICTION,
- /** Fanfictions from a lot of different universes */
- FANFICTION,
- /** Website with lots of Mangas */
- MANGAFOX,
- /** Furry website with comics support */
- E621,
- /** Furry website with stories */
- YIFFSTAR,
- /** Comics and images groups, mostly but not only NSFW */
- E_HENTAI,
- /** CBZ files */
- CBZ,
- /** HTML files */
- HTML;
-
- /**
- * A description of this support type (more information than the
- * {@link BasicSupport#getSourceName()}).
- *
- * @return the description
- */
- public String getDesc() {
- String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
- this.name());
-
- if (desc == null) {
- desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
- }
-
- return desc;
- }
-
- /**
- * The name of this support type (a short version).
- *
- * @return the name
- */
- public String getSourceName() {
- BasicSupport support = BasicSupport.getSupport(this);
- if (support != null) {
- return support.getSourceName();
- }
-
- return null;
- }
-
- @Override
- public String toString() {
- return super.toString().toLowerCase();
- }
-
- /**
- * Call {@link SupportType#valueOf(String)} after conversion to upper
- * case.
- *
- * @param typeName
- * the possible type name
- *
- * @return NULL or the type
- */
- public static SupportType valueOfUC(String typeName) {
- return SupportType.valueOf(typeName == null ? null : typeName
- .toUpperCase());
- }
-
- /**
- * Call {@link SupportType#valueOf(String)} after conversion to upper
- * case but return NULL for NULL instead of raising exception.
- *
- * @param typeName
- * the possible type name
- *
- * @return NULL or the type
- */
- public static SupportType valueOfNullOkUC(String typeName) {
- if (typeName == null) {
- return null;
- }
-
- return SupportType.valueOfUC(typeName);
- }
-
- /**
- * Call {@link SupportType#valueOf(String)} after conversion to upper
- * case but return NULL in case of error instead of raising an
- * exception.
- *
- * @param typeName
- * the possible type name
- *
- * @return NULL or the type
- */
- public static SupportType valueOfAllOkUC(String typeName) {
- try {
- return SupportType.valueOfUC(typeName);
- } catch (Exception e) {
- return null;
- }
- }
- }
-
- private InputStream in;
+ private Document sourceNode;
+ private URL source;
private SupportType type;
private URL currentReferer; // with only one 'r', as in 'HTTP'...
- // quote chars
- private char openQuote = Instance.getTrans().getCharacter(
- StringId.OPEN_SINGLE_QUOTE);
- private char closeQuote = Instance.getTrans().getCharacter(
- StringId.CLOSE_SINGLE_QUOTE);
- private char openDoubleQuote = Instance.getTrans().getCharacter(
- StringId.OPEN_DOUBLE_QUOTE);
- private char closeDoubleQuote = Instance.getTrans().getCharacter(
- StringId.CLOSE_DOUBLE_QUOTE);
-
/**
* The name of this support class.
*
/**
* Return the {@link MetaData} of this story.
*
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
- *
* @return the associated {@link MetaData}, never NULL
*
* @throws IOException
* in case of I/O error
*/
- protected abstract MetaData getMeta(URL source, InputStream in)
- throws IOException;
+ protected abstract MetaData getMeta() throws IOException;
/**
* Return the story description.
*
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
- *
* @return the description
*
* @throws IOException
* in case of I/O error
*/
- protected abstract String getDesc(URL source, InputStream in)
- throws IOException;
+ protected abstract String getDesc() throws IOException;
/**
- * Return the list of chapters (name and resource).
+ * Return the list of chapters (name and resource). *
+ * <p>
+ * Can be NULL if this {@link BasicSupport} do no use chapters.
*
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
* @param pg
* the optional progress reporter
*
- * @return the chapters
+ * @return the chapters or NULL
*
* @throws IOException
* in case of I/O error
*/
- protected abstract List<Entry<String, URL>> getChapters(URL source,
- InputStream in, Progress pg) throws IOException;
+ protected abstract List<Entry<String, URL>> getChapters(Progress pg)
+ throws IOException;
/**
* Return the content of the chapter (possibly HTML encoded, if
* {@link BasicSupport#isHtml()} is TRUE).
*
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
+ * @param chapUrl
+ * the chapter {@link URL}
* @param number
* the chapter number
* @param pg
* @throws IOException
* in case of I/O error
*/
- protected abstract String getChapterContent(URL source, InputStream in,
- int number, Progress pg) throws IOException;
-
- /**
- * Log into the support (can be a no-op depending upon the support).
- *
- * @throws IOException
- * in case of I/O error
- */
- @SuppressWarnings("unused")
- public void login() throws IOException {
- }
+ protected abstract String getChapterContent(URL chapUrl, int number,
+ Progress pg) throws IOException;
/**
* Return the list of cookies (values included) that must be used to
* Return the canonical form of the main {@link URL}.
*
* @param source
+ * the source {@link URL}, which can be NULL
+ *
+ * @return the canonical form of this {@link URL} or NULL if the source was
+ * NULL
+ */
+ protected URL getCanonicalUrl(URL source) {
+ return source;
+ }
+
+ /**
+ * The main {@link Node} for this {@link Story}.
+ *
+ * @return the node
+ */
+ protected Element getSourceNode() {
+ return sourceNode;
+ }
+
+ /**
+ * The main {@link URL} for this {@link Story}.
+ *
+ * @return the URL
+ */
+ protected URL getSource() {
+ return source;
+ }
+
+ /**
+ * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
+ * the current {@link URL} we work on.
+ *
+ * @return the referer
+ */
+ public URL getCurrentReferer() {
+ return currentReferer;
+ }
+
+ /**
+ * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
+ * the current {@link URL} we work on.
+ *
+ * @param currentReferer
+ * the new referer
+ */
+ protected void setCurrentReferer(URL currentReferer) {
+ this.currentReferer = currentReferer;
+ }
+
+ /**
+ * The support type.
+ *
+ * @return the type
+ */
+ public SupportType getType() {
+ return type;
+ }
+
+ /**
+ * The support type.
+ *
+ * @param type
+ * the new type
+ */
+ protected void setType(SupportType type) {
+ this.type = type;
+ }
+
+ /**
+ * Open an input link that will be used for the support.
+ * <p>
+ * Can return NULL, in which case you are supposed to work without an
+ * {@link InputStream}.
+ *
+ * @param source
* the source {@link URL}
*
- * @return the canonical form of this {@link URL}
+ * @return the {@link InputStream}
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ protected Document loadDocument(URL source) throws IOException {
+ String url = getCanonicalUrl(source).toString();
+ return DataUtil.load(Instance.getCache().open(source, this, false),
+ "UTF-8", url.toString());
+ }
+
+ /**
+ * Log into the support (can be a no-op depending upon the support).
*
* @throws IOException
* in case of I/O error
*/
@SuppressWarnings("unused")
- public URL getCanonicalUrl(URL source) throws IOException {
- return source;
+ protected void login() throws IOException {
+ }
+
+ /**
+ * Prepare the support if needed before processing.
+ *
+ * @throws IOException
+ * on I/O error
+ */
+ @SuppressWarnings("unused")
+ protected void preprocess() throws IOException {
+ }
+
+ /**
+ * Now that we have processed the {@link Story}, close the resources if any.
+ */
+ protected void close() {
+ setCurrentReferer(null);
}
/**
* Process the given story resource into a partially filled {@link Story}
* object containing the name and metadata, except for the description.
*
- * @param url
- * the story resource
- *
* @return the {@link Story}
*
* @throws IOException
* in case of I/O error
*/
- public Story processMeta(URL url) throws IOException {
- return processMeta(url, true, false, null);
+ public Story processMeta() throws IOException {
+ Story story = null;
+
+ preprocess();
+ try {
+ story = processMeta(false, null);
+ } finally {
+ close();
+ }
+
+ return story;
}
/**
* Process the given story resource into a partially filled {@link Story}
* object containing the name and metadata.
*
- * @param url
- * the story resource
- * @param close
- * close "this" and "in" when done
* @param getDesc
* retrieve the description of the story, or not
* @param pg
* @throws IOException
* in case of I/O error
*/
- protected Story processMeta(URL url, boolean close, boolean getDesc,
- Progress pg) throws IOException {
+ protected Story processMeta(boolean getDesc, Progress pg)
+ throws IOException {
if (pg == null) {
pg = new Progress();
} else {
pg.setMinMax(0, 100);
}
- login();
- pg.setProgress(10);
-
- url = getCanonicalUrl(url);
-
- setCurrentReferer(url);
-
- in = openInput(url); // NULL allowed here
- try {
- preprocess(url, getInput());
- pg.setProgress(30);
-
- Story story = new Story();
- MetaData meta = getMeta(url, getInput());
- if (meta.getCreationDate() == null
- || meta.getCreationDate().isEmpty()) {
- meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
- }
- story.setMeta(meta);
-
- pg.setProgress(50);
+ pg.setProgress(30);
- if (meta.getCover() == null) {
- meta.setCover(getDefaultCover(meta.getSubject()));
- }
-
- pg.setProgress(60);
+ Story story = new Story();
+ MetaData meta = getMeta();
+ if (meta.getCreationDate() == null || meta.getCreationDate().isEmpty()) {
+ meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
+ }
+ story.setMeta(meta);
- if (getDesc) {
- String descChapterName = Instance.getTrans().getString(
- StringId.DESCRIPTION);
- story.getMeta().setResume(
- makeChapter(url, 0, descChapterName,
- getDesc(url, getInput()), null));
- }
+ pg.setProgress(50);
- pg.setProgress(100);
- return story;
- } finally {
- if (close) {
- try {
- close();
- } catch (IOException e) {
- Instance.getTraceHandler().error(e);
- }
+ if (meta.getCover() == null) {
+ meta.setCover(BasicSupportHelper.getDefaultCover(meta.getSubject()));
+ }
- if (in != null) {
- in.close();
- }
- }
+ pg.setProgress(60);
- setCurrentReferer(null);
+ if (getDesc) {
+ String descChapterName = Instance.getTrans().getString(
+ StringId.DESCRIPTION);
+ story.getMeta().setResume(
+ BasicSupportPara.makeChapter(this, source, 0,
+ descChapterName, //
+ getDesc(), isHtml(), null));
}
+
+ pg.setProgress(100);
+ return story;
}
/**
* Process the given story resource into a fully filled {@link Story}
* object.
*
- * @param url
- * the story resource
* @param pg
* the optional progress reporter
*
* @throws IOException
* in case of I/O error
*/
- public Story process(URL url, Progress pg) throws IOException {
+ public Story process(Progress pg) throws IOException {
if (pg == null) {
pg = new Progress();
} else {
pg.setMinMax(0, 100);
}
- url = getCanonicalUrl(url);
+ setCurrentReferer(source);
+ login();
+ sourceNode = loadDocument(source);
+
pg.setProgress(1);
try {
Progress pgMeta = new Progress();
pg.addProgress(pgMeta, 10);
- Story story = processMeta(url, false, true, pgMeta);
+ preprocess();
+ Story story = processMeta(true, pgMeta);
if (!pgMeta.isDone()) {
pgMeta.setProgress(pgMeta.getMax()); // 10%
}
pg.setName("Retrieving " + story.getMeta().getTitle());
- setCurrentReferer(url);
-
Progress pgGetChapters = new Progress();
pg.addProgress(pgGetChapters, 10);
story.setChapters(new ArrayList<Chapter>());
- List<Entry<String, URL>> chapters = getChapters(url, getInput(),
- pgGetChapters);
+ List<Entry<String, URL>> chapters = getChapters(pgGetChapters);
if (!pgGetChapters.isDone()) {
pgGetChapters.setProgress(pgGetChapters.getMax()); // 20%
}
int i = 1;
for (Entry<String, URL> chap : chapters) {
pgChaps.setName("Extracting chapter " + i);
- InputStream chapIn = null;
- if (chap.getValue() != null) {
- setCurrentReferer(chap.getValue());
- chapIn = Instance.getCache().open(chap.getValue(),
- this, false);
+ URL chapUrl = chap.getValue();
+ String chapName = chap.getKey();
+ if (chapUrl != null) {
+ setCurrentReferer(chapUrl);
}
- pgChaps.setProgress(i * 100);
- try {
- Progress pgGetChapterContent = new Progress();
- Progress pgMakeChapter = new Progress();
- pgChaps.addProgress(pgGetChapterContent, 100);
- pgChaps.addProgress(pgMakeChapter, 100);
-
- String content = getChapterContent(url, chapIn, i,
- pgGetChapterContent);
- if (!pgGetChapterContent.isDone()) {
- pgGetChapterContent.setProgress(pgGetChapterContent
- .getMax());
- }
- Chapter cc = makeChapter(url, i, chap.getKey(),
- content, pgMakeChapter);
- if (!pgMakeChapter.isDone()) {
- pgMakeChapter.setProgress(pgMakeChapter.getMax());
- }
+ pgChaps.setProgress(i * 100);
+ Progress pgGetChapterContent = new Progress();
+ Progress pgMakeChapter = new Progress();
+ pgChaps.addProgress(pgGetChapterContent, 100);
+ pgChaps.addProgress(pgMakeChapter, 100);
+
+ String content = getChapterContent(chapUrl, i,
+ pgGetChapterContent);
+ if (!pgGetChapterContent.isDone()) {
+ pgGetChapterContent.setProgress(pgGetChapterContent
+ .getMax());
+ }
- words += cc.getWords();
- story.getChapters().add(cc);
- story.getMeta().setWords(words);
- } finally {
- if (chapIn != null) {
- chapIn.close();
- }
+ Chapter cc = BasicSupportPara.makeChapter(this, chapUrl, i,
+ chapName, content, isHtml(), pgMakeChapter);
+ if (!pgMakeChapter.isDone()) {
+ pgMakeChapter.setProgress(pgMakeChapter.getMax());
}
+ words += cc.getWords();
+ story.getChapters().add(cc);
+ story.getMeta().setWords(words);
+
i++;
}
}
return story;
-
} finally {
- try {
- close();
- } catch (IOException e) {
- Instance.getTraceHandler().error(e);
- }
-
- if (in != null) {
- in.close();
- }
-
- setCurrentReferer(null);
+ close();
}
}
/**
- * The support type.
+ * Return a {@link BasicSupport} implementation supporting the given
+ * resource if possible.
*
- * @return the type
- */
- public SupportType getType() {
- return type;
- }
-
- /**
- * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
- * the current {@link URL} we work on.
+ * @param url
+ * the story resource
*
- * @return the referer
+ * @return an implementation that supports it, or NULL
*/
- public URL getCurrentReferer() {
- return currentReferer;
- }
+ public static BasicSupport getSupport(URL url) {
+ if (url == null) {
+ return null;
+ }
- /**
- * The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
- * the current {@link URL} we work on.
- *
- * @param currentReferer
- * the new referer
- */
- protected void setCurrentReferer(URL currentReferer) {
- this.currentReferer = currentReferer;
- }
+ // TEXT and INFO_TEXT always support files (not URLs though)
+ for (SupportType type : SupportType.values()) {
+ if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
+ BasicSupport support = getSupport(type, url);
+ if (support != null && support.supports(url)) {
+ return support;
+ }
+ }
+ }
- /**
- * The support type.
- *
- * @param type
- * the new type
- *
- * @return this
- */
- protected BasicSupport setType(SupportType type) {
- this.type = type;
- return this;
+ for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
+ SupportType.TEXT }) {
+ BasicSupport support = getSupport(type, url);
+ if (support != null && support.supports(url)) {
+ return support;
+ }
+ }
+
+ return null;
}
/**
- * Prepare the support if needed before processing.
+ * Return a {@link BasicSupport} implementation supporting the given type.
*
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
+ * @param type
+ * the type
+ * @param url
+ * the {@link URL} to support (can be NULL to get an
+ * "abstract support")
*
- * @throws IOException
- * on I/O error
+ * @return an implementation that supports it, or NULL
*/
- @SuppressWarnings("unused")
- protected void preprocess(URL source, InputStream in) throws IOException {
- }
+ public static BasicSupport getSupport(SupportType type, URL url) {
+ BasicSupport support = null;
- /**
- * Now that we have processed the {@link Story}, close the resources if any.
- *
- * @throws IOException
- * on I/O error
- */
- @SuppressWarnings("unused")
- protected void close() throws IOException {
- }
-
- /**
- * Create a {@link Chapter} object from the given information, formatting
- * the content as it should be.
- *
- * @param source
- * the source of the story
- * @param number
- * the chapter number
- * @param name
- * the chapter name
- * @param content
- * the chapter content
- * @param pg
- * the optional progress reporter
- *
- * @return the {@link Chapter}
- *
- * @throws IOException
- * in case of I/O error
- */
- protected Chapter makeChapter(URL source, int number, String name,
- String content, Progress pg) throws IOException {
- // Chapter name: process it correctly, then remove the possible
- // redundant "Chapter x: " in front of it, or "-" (as in
- // "Chapter 5: - Fun!" after the ": " was automatically added)
- String chapterName = processPara(name).getContent().trim();
- for (String lang : Instance.getConfig().getString(Config.CHAPTER)
- .split(",")) {
- String chapterWord = Instance.getConfig().getStringX(
- Config.CHAPTER, lang);
- if (chapterName.startsWith(chapterWord)) {
- chapterName = chapterName.substring(chapterWord.length())
- .trim();
- break;
- }
- }
-
- if (chapterName.startsWith(Integer.toString(number))) {
- chapterName = chapterName.substring(
- Integer.toString(number).length()).trim();
- }
-
- while (chapterName.startsWith(":") || chapterName.startsWith("-")) {
- chapterName = chapterName.substring(1).trim();
- }
- //
-
- Chapter chap = new Chapter(number, chapterName);
-
- if (content != null) {
- List<Paragraph> paras = makeParagraphs(source, content, pg);
- long words = 0;
- for (Paragraph para : paras) {
- words += para.getWords();
- }
- chap.setParagraphs(paras);
- chap.setWords(words);
- }
-
- return chap;
-
- }
-
- /**
- * Convert the given content into {@link Paragraph}s.
- *
- * @param source
- * the source URL of the story
- * @param content
- * the textual content
- * @param pg
- * the optional progress reporter
- *
- * @return the {@link Paragraph}s
- *
- * @throws IOException
- * in case of I/O error
- */
- protected List<Paragraph> makeParagraphs(URL source, String content,
- Progress pg) throws IOException {
- if (pg == null) {
- pg = new Progress();
- }
-
- if (isHtml()) {
- // Special <HR> processing:
- content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
- "<br/>* * *<br/>");
- }
-
- List<Paragraph> paras = new ArrayList<Paragraph>();
-
- if (content != null && !content.trim().isEmpty()) {
- if (isHtml()) {
- String[] tab = content.split("(<p>|</p>|<br>|<br/>)");
- pg.setMinMax(0, tab.length);
- int i = 1;
- for (String line : tab) {
- if (line.startsWith("[") && line.endsWith("]")) {
- pg.setName("Extracting image " + i);
- }
- paras.add(makeParagraph(source, line.trim()));
- pg.setProgress(i++);
- }
- pg.setName(null);
- } else {
- List<String> lines = new ArrayList<String>();
- BufferedReader buff = null;
- try {
- buff = new BufferedReader(
- new InputStreamReader(new ByteArrayInputStream(
- content.getBytes("UTF-8")), "UTF-8"));
- for (String line = buff.readLine(); line != null; line = buff
- .readLine()) {
- lines.add(line.trim());
- }
- } finally {
- if (buff != null) {
- buff.close();
- }
- }
-
- pg.setMinMax(0, lines.size());
- int i = 0;
- for (String line : lines) {
- if (line.startsWith("[") && line.endsWith("]")) {
- pg.setName("Extracting image " + i);
- }
- paras.add(makeParagraph(source, line));
- pg.setProgress(i++);
- }
- pg.setName(null);
- }
-
- // Check quotes for "bad" format
- List<Paragraph> newParas = new ArrayList<Paragraph>();
- for (Paragraph para : paras) {
- newParas.addAll(requotify(para));
- }
- paras = newParas;
-
- // Remove double blanks/brks
- fixBlanksBreaks(paras);
- }
-
- return paras;
- }
-
- /**
- * Convert the given line into a single {@link Paragraph}.
- *
- * @param source
- * the source URL of the story
- * @param line
- * the textual content of the paragraph
- *
- * @return the {@link Paragraph}
- */
- private Paragraph makeParagraph(URL source, String line) {
- Image image = null;
- if (line.startsWith("[") && line.endsWith("]")) {
- image = getImage(this, source, line.substring(1, line.length() - 1)
- .trim());
- }
-
- if (image != null) {
- return new Paragraph(image);
- }
-
- return processPara(line);
- }
-
- /**
- * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
- * those {@link Paragraph}s.
- * <p>
- * The resulting list will not contain a starting or trailing blank/break
- * nor 2 blanks or breaks following each other.
- *
- * @param paras
- * the list of {@link Paragraph}s to fix
- */
- protected void fixBlanksBreaks(List<Paragraph> paras) {
- boolean space = false;
- boolean brk = true;
- for (int i = 0; i < paras.size(); i++) {
- Paragraph para = paras.get(i);
- boolean thisSpace = para.getType() == ParagraphType.BLANK;
- boolean thisBrk = para.getType() == ParagraphType.BREAK;
-
- if (i > 0 && space && thisBrk) {
- paras.remove(i - 1);
- i--;
- } else if ((space || brk) && (thisSpace || thisBrk)) {
- paras.remove(i);
- i--;
- }
-
- space = thisSpace;
- brk = thisBrk;
- }
-
- // Remove blank/brk at start
- if (paras.size() > 0
- && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
- 0).getType() == ParagraphType.BREAK)) {
- paras.remove(0);
- }
-
- // Remove blank/brk at end
- int last = paras.size() - 1;
- if (paras.size() > 0
- && (paras.get(last).getType() == ParagraphType.BLANK || paras
- .get(last).getType() == ParagraphType.BREAK)) {
- paras.remove(last);
- }
- }
-
- /**
- * Get the default cover related to this subject (see <tt>.info</tt> files).
- *
- * @param subject
- * the subject
- *
- * @return the cover if any, or NULL
- */
- static Image getDefaultCover(String subject) {
- if (subject != null && !subject.isEmpty()
- && Instance.getCoverDir() != null) {
- try {
- File fileCover = new File(Instance.getCoverDir(), subject);
- return getImage(null, fileCover.toURI().toURL(), subject);
- } catch (MalformedURLException e) {
- }
- }
-
- return null;
- }
-
- /**
- * Return the list of supported image extensions.
- *
- * @param emptyAllowed
- * TRUE to allow an empty extension on first place, which can be
- * used when you may already have an extension in your input but
- * are not sure about it
- *
- * @return the extensions
- */
- static String[] getImageExt(boolean emptyAllowed) {
- if (emptyAllowed) {
- return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
- }
-
- return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
- }
-
- /**
- * Check if the given resource can be a local image or a remote image, then
- * refresh the cache with it if it is.
- *
- * @param source
- * the story source
- * @param line
- * the resource to check
- *
- * @return the image if found, or NULL
- *
- */
- static Image getImage(BasicSupport support, URL source, String line) {
- URL url = getImageUrl(support, source, line);
- if (url != null) {
- if ("file".equals(url.getProtocol())) {
- if (new File(url.getPath()).isDirectory()) {
- return null;
- }
- }
- InputStream in = null;
- try {
- in = Instance.getCache().open(url, getSupport(url), true);
- return new Image(in);
- } catch (IOException e) {
- } finally {
- if (in != null) {
- try {
- in.close();
- } catch (IOException e) {
- }
- }
- }
- }
-
- return null;
- }
-
- /**
- * Check if the given resource can be a local image or a remote image, then
- * refresh the cache with it if it is.
- *
- * @param source
- * the story source
- * @param line
- * the resource to check
- *
- * @return the image URL if found, or NULL
- *
- */
- static URL getImageUrl(BasicSupport support, URL source, String line) {
- URL url = null;
-
- if (line != null) {
- // try for files
- if (source != null) {
- try {
-
- String relPath = null;
- String absPath = null;
- try {
- String path = new File(source.getFile()).getParent();
- relPath = new File(new File(path), line.trim())
- .getAbsolutePath();
- } catch (Exception e) {
- // Cannot be converted to path (one possibility to take
- // into account: absolute path on Windows)
- }
- try {
- absPath = new File(line.trim()).getAbsolutePath();
- } catch (Exception e) {
- // Cannot be converted to path (at all)
- }
-
- for (String ext : getImageExt(true)) {
- File absFile = new File(absPath + ext);
- File relFile = new File(relPath + ext);
- if (absPath != null && absFile.exists()
- && absFile.isFile()) {
- url = absFile.toURI().toURL();
- } else if (relPath != null && relFile.exists()
- && relFile.isFile()) {
- url = relFile.toURI().toURL();
- }
- }
- } catch (Exception e) {
- // Should not happen since we control the correct arguments
- }
- }
-
- if (url == null) {
- // try for URLs
- try {
- for (String ext : getImageExt(true)) {
- if (Instance.getCache()
- .check(new URL(line + ext), true)) {
- url = new URL(line + ext);
- break;
- }
- }
-
- // try out of cache
- if (url == null) {
- for (String ext : getImageExt(true)) {
- try {
- url = new URL(line + ext);
- Instance.getCache().refresh(url, support, true);
- break;
- } catch (IOException e) {
- // no image with this ext
- url = null;
- }
- }
- }
- } catch (MalformedURLException e) {
- // Not an url
- }
- }
-
- // refresh the cached file
- if (url != null) {
- try {
- Instance.getCache().refresh(url, support, true);
- } catch (IOException e) {
- // woops, broken image
- url = null;
- }
- }
- }
-
- return url;
- }
-
- /**
- * Open the input file that will be used through the support.
- * <p>
- * Can return NULL, in which case you are supposed to work without an
- * {@link InputStream}.
- *
- * @param source
- * the source {@link URL}
- *
- * @return the {@link InputStream}
- *
- * @throws IOException
- * in case of I/O error
- */
- protected InputStream openInput(URL source) throws IOException {
- return Instance.getCache().open(source, this, false);
- }
-
- /**
- * Reset then return {@link BasicSupport#in}.
- *
- * @return {@link BasicSupport#in}
- */
- protected InputStream getInput() {
- return reset(in);
- }
-
- /**
- * Fix the author name if it is prefixed with some "by" {@link String}.
- *
- * @param author
- * the author with a possible prefix
- *
- * @return the author without prefixes
- */
- protected String fixAuthor(String author) {
- if (author != null) {
- for (String suffix : new String[] { " ", ":" }) {
- for (String byString : Instance.getConfig()
- .getString(Config.BYS).split(",")) {
- byString += suffix;
- if (author.toUpperCase().startsWith(byString.toUpperCase())) {
- author = author.substring(byString.length()).trim();
- }
- }
- }
-
- // Special case (without suffix):
- if (author.startsWith("©")) {
- author = author.substring(1);
- }
- }
-
- return author;
- }
-
- /**
- * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
- * and requotify them (i.e., separate them into QUOTE paragraphs and other
- * paragraphs (quotes or not)).
- *
- * @param para
- * the paragraph to requotify (not necessarily a quote)
- *
- * @return the correctly (or so we hope) quotified paragraphs
- */
- protected List<Paragraph> requotify(Paragraph para) {
- List<Paragraph> newParas = new ArrayList<Paragraph>();
-
- if (para.getType() == ParagraphType.QUOTE
- && para.getContent().length() > 2) {
- String line = para.getContent();
- boolean singleQ = line.startsWith("" + openQuote);
- boolean doubleQ = line.startsWith("" + openDoubleQuote);
-
- // Do not try when more than one quote at a time
- // (some stories are not easily readable if we do)
- if (singleQ
- && line.indexOf(closeQuote, 1) < line
- .lastIndexOf(closeQuote)) {
- newParas.add(para);
- return newParas;
- }
- if (doubleQ
- && line.indexOf(closeDoubleQuote, 1) < line
- .lastIndexOf(closeDoubleQuote)) {
- newParas.add(para);
- return newParas;
- }
- //
-
- if (!singleQ && !doubleQ) {
- line = openDoubleQuote + line + closeDoubleQuote;
- newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
- .getWords()));
- } else {
- char open = singleQ ? openQuote : openDoubleQuote;
- char close = singleQ ? closeQuote : closeDoubleQuote;
-
- int posDot = -1;
- boolean inQuote = false;
- int i = 0;
- for (char car : line.toCharArray()) {
- if (car == open) {
- inQuote = true;
- } else if (car == close) {
- inQuote = false;
- } else if (car == '.' && !inQuote) {
- posDot = i;
- break;
- }
- i++;
- }
-
- if (posDot >= 0) {
- String rest = line.substring(posDot + 1).trim();
- line = line.substring(0, posDot + 1).trim();
- long words = 1;
- for (char car : line.toCharArray()) {
- if (car == ' ') {
- words++;
- }
- }
- newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
- if (!rest.isEmpty()) {
- newParas.addAll(requotify(processPara(rest)));
- }
- } else {
- newParas.add(para);
- }
- }
- } else {
- newParas.add(para);
- }
-
- return newParas;
- }
-
- /**
- * Process a {@link Paragraph} from a raw line of text.
- * <p>
- * Will also fix quotes and HTML encoding if needed.
- *
- * @param line
- * the raw line
- *
- * @return the processed {@link Paragraph}
- */
- protected Paragraph processPara(String line) {
- line = ifUnhtml(line).trim();
-
- boolean space = true;
- boolean brk = true;
- boolean quote = false;
- boolean tentativeCloseQuote = false;
- char prev = '\0';
- int dashCount = 0;
- long words = 1;
-
- StringBuilder builder = new StringBuilder();
- for (char car : line.toCharArray()) {
- if (car != '-') {
- if (dashCount > 0) {
- // dash, ndash and mdash: - – —
- // currently: always use mdash
- builder.append(dashCount == 1 ? '-' : '—');
- }
- dashCount = 0;
- }
-
- if (tentativeCloseQuote) {
- tentativeCloseQuote = false;
- if (Character.isLetterOrDigit(car)) {
- builder.append("'");
- } else {
- // handle double-single quotes as double quotes
- if (prev == car) {
- builder.append(closeDoubleQuote);
- continue;
- }
-
- builder.append(closeQuote);
- }
- }
-
- switch (car) {
- case ' ': // note: unbreakable space
- case ' ':
- case '\t':
- case '\n': // just in case
- case '\r': // just in case
- if (builder.length() > 0
- && builder.charAt(builder.length() - 1) != ' ') {
- words++;
- }
- builder.append(' ');
- break;
-
- case '\'':
- if (space || (brk && quote)) {
- quote = true;
- // handle double-single quotes as double quotes
- if (prev == car) {
- builder.deleteCharAt(builder.length() - 1);
- builder.append(openDoubleQuote);
- } else {
- builder.append(openQuote);
- }
- } else if (prev == ' ' || prev == car) {
- // handle double-single quotes as double quotes
- if (prev == car) {
- builder.deleteCharAt(builder.length() - 1);
- builder.append(openDoubleQuote);
- } else {
- builder.append(openQuote);
- }
- } else {
- // it is a quote ("I'm off") or a 'quote' ("This
- // 'good' restaurant"...)
- tentativeCloseQuote = true;
- }
- break;
-
- case '"':
- if (space || (brk && quote)) {
- quote = true;
- builder.append(openDoubleQuote);
- } else if (prev == ' ') {
- builder.append(openDoubleQuote);
- } else {
- builder.append(closeDoubleQuote);
- }
- break;
-
- case '-':
- if (space) {
- quote = true;
- } else {
- dashCount++;
- }
- space = false;
- break;
-
- case '*':
- case '~':
- case '/':
- case '\\':
- case '<':
- case '>':
- case '=':
- case '+':
- case '_':
- case '–':
- case '—':
- space = false;
- builder.append(car);
- break;
-
- case '‘':
- case '`':
- case '‹':
- case '﹁':
- case '〈':
- case '「':
- if (space || (brk && quote)) {
- quote = true;
- builder.append(openQuote);
- } else {
- // handle double-single quotes as double quotes
- if (prev == car) {
- builder.deleteCharAt(builder.length() - 1);
- builder.append(openDoubleQuote);
- } else {
- builder.append(openQuote);
- }
- }
- space = false;
- brk = false;
- break;
-
- case '’':
- case '›':
- case '﹂':
- case '〉':
- case '」':
- space = false;
- brk = false;
- // handle double-single quotes as double quotes
- if (prev == car) {
- builder.deleteCharAt(builder.length() - 1);
- builder.append(closeDoubleQuote);
- } else {
- builder.append(closeQuote);
- }
- break;
-
- case '«':
- case '“':
- case '﹃':
- case '《':
- case '『':
- if (space || (brk && quote)) {
- quote = true;
- builder.append(openDoubleQuote);
- } else {
- builder.append(openDoubleQuote);
- }
- space = false;
- brk = false;
- break;
-
- case '»':
- case '”':
- case '﹄':
- case '》':
- case '』':
- space = false;
- brk = false;
- builder.append(closeDoubleQuote);
- break;
-
- default:
- space = false;
- brk = false;
- builder.append(car);
- break;
- }
-
- prev = car;
- }
-
- if (tentativeCloseQuote) {
- tentativeCloseQuote = false;
- builder.append(closeQuote);
- }
-
- line = builder.toString().trim();
-
- ParagraphType type = ParagraphType.NORMAL;
- if (space) {
- type = ParagraphType.BLANK;
- } else if (brk) {
- type = ParagraphType.BREAK;
- } else if (quote) {
- type = ParagraphType.QUOTE;
- }
-
- return new Paragraph(type, line, words);
- }
-
- /**
- * Remove the HTML from the input <b>if</b> {@link BasicSupport#isHtml()} is
- * true.
- *
- * @param input
- * the input
- *
- * @return the no html version if needed
- */
- private String ifUnhtml(String input) {
- if (isHtml() && input != null) {
- return StringUtils.unhtml(input);
- }
-
- return input;
- }
-
- /**
- * Return a {@link BasicSupport} implementation supporting the given
- * resource if possible.
- *
- * @param url
- * the story resource
- *
- * @return an implementation that supports it, or NULL
- */
- public static BasicSupport getSupport(URL url) {
- if (url == null) {
- return null;
- }
-
- // TEXT and INFO_TEXT always support files (not URLs though)
- for (SupportType type : SupportType.values()) {
- if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
- BasicSupport support = getSupport(type);
- if (support != null && support.supports(url)) {
- return support;
- }
- }
- }
-
- for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
- SupportType.TEXT }) {
- BasicSupport support = getSupport(type);
- if (support != null && support.supports(url)) {
- return support;
- }
- }
-
- return null;
- }
-
- /**
- * Return a {@link BasicSupport} implementation supporting the given type.
- *
- * @param type
- * the type
- *
- * @return an implementation that supports it, or NULL
- */
- public static BasicSupport getSupport(SupportType type) {
switch (type) {
case EPUB:
- return new Epub().setType(type);
+ support = new Epub();
+ break;
case INFO_TEXT:
- return new InfoText().setType(type);
+ support = new InfoText();
+ break;
case FIMFICTION:
try {
// Can fail if no client key or NO in options
- return new FimfictionApi().setType(type);
+ support = new FimfictionApi();
} catch (IOException e) {
- return new Fimfiction().setType(type);
+ support = new Fimfiction();
}
+ break;
case FANFICTION:
- return new Fanfiction().setType(type);
+ support = new Fanfiction();
+ break;
case TEXT:
- return new Text().setType(type);
+ support = new Text();
+ break;
case MANGAFOX:
- return new MangaFox().setType(type);
+ support = new MangaFox();
+ break;
case E621:
- return new E621().setType(type);
+ support = new E621();
+ break;
case YIFFSTAR:
- return new YiffStar().setType(type);
+ support = new YiffStar();
+ break;
case E_HENTAI:
- return new EHentai().setType(type);
+ support = new EHentai();
+ break;
case CBZ:
- return new Cbz().setType(type);
+ support = new Cbz();
+ break;
case HTML:
- return new Html().setType(type);
- }
-
- return null;
- }
-
- /**
- * Reset the given {@link InputStream} and return it.
- *
- * @param in
- * the {@link InputStream} to reset
- *
- * @return the same {@link InputStream} after reset
- */
- static protected InputStream reset(InputStream in) {
- try {
- if (in != null) {
- in.reset();
- }
- } catch (IOException e) {
- }
-
- return in;
- }
-
- /**
- * Return the first line from the given input which correspond to the given
- * selectors.
- *
- * @param in
- * the input
- * @param needle
- * a string that must be found inside the target line (also
- * supports "^" at start to say "only if it starts with" the
- * needle)
- * @param relativeLine
- * the line to return based upon the target line position (-1 =
- * the line before, 0 = the target line...)
- *
- * @return the line
- */
- static protected String getLine(InputStream in, String needle,
- int relativeLine) {
- return getLine(in, needle, relativeLine, true);
- }
-
- /**
- * Return a line from the given input which correspond to the given
- * selectors.
- *
- * @param in
- * the input
- * @param needle
- * a string that must be found inside the target line (also
- * supports "^" at start to say "only if it starts with" the
- * needle)
- * @param relativeLine
- * the line to return based upon the target line position (-1 =
- * the line before, 0 = the target line...)
- * @param first
- * takes the first result (as opposed to the last one, which will
- * also always spend the input)
- *
- * @return the line
- */
- static protected String getLine(InputStream in, String needle,
- int relativeLine, boolean first) {
- String rep = null;
-
- reset(in);
-
- List<String> lines = new ArrayList<String>();
- @SuppressWarnings("resource")
- Scanner scan = new Scanner(in, "UTF-8");
- int index = -1;
- scan.useDelimiter("\\n");
- while (scan.hasNext()) {
- lines.add(scan.next());
-
- if (index == -1) {
- if (needle.startsWith("^")) {
- if (lines.get(lines.size() - 1).startsWith(
- needle.substring(1))) {
- index = lines.size() - 1;
- }
-
- } else {
- if (lines.get(lines.size() - 1).contains(needle)) {
- index = lines.size() - 1;
- }
- }
- }
-
- if (index >= 0 && index + relativeLine < lines.size()) {
- rep = lines.get(index + relativeLine);
- if (first) {
- break;
- }
- }
+ support = new Html();
+ break;
}
- return rep;
- }
-
- /**
- * Return the text between the key and the endKey (and optional subKey can
- * be passed, in this case we will look for the key first, then take the
- * text between the subKey and the endKey).
- * <p>
- * Will only match the first line with the given key if more than one are
- * possible. Which also means that if the subKey or endKey is not found on
- * that line, NULL will be returned.
- *
- * @param in
- * the input
- * @param key
- * the key to match (also supports "^" at start to say
- * "only if it starts with" the key)
- * @param subKey
- * the sub key or NULL if none
- * @param endKey
- * the end key or NULL for "up to the end"
- * @return the text or NULL if not found
- */
- static protected String getKeyLine(InputStream in, String key,
- String subKey, String endKey) {
- return getKeyText(getLine(in, key, 0), key, subKey, endKey);
- }
-
- /**
- * Return the text between the key and the endKey (and optional subKey can
- * be passed, in this case we will look for the key first, then take the
- * text between the subKey and the endKey).
- *
- * @param in
- * the input
- * @param key
- * the key to match (also supports "^" at start to say
- * "only if it starts with" the key)
- * @param subKey
- * the sub key or NULL if none
- * @param endKey
- * the end key or NULL for "up to the end"
- * @return the text or NULL if not found
- */
- static protected String getKeyText(String in, String key, String subKey,
- String endKey) {
- String result = null;
-
- String line = in;
- if (line != null && line.contains(key)) {
- line = line.substring(line.indexOf(key) + key.length());
- if (subKey == null || subKey.isEmpty() || line.contains(subKey)) {
- if (subKey != null) {
- line = line.substring(line.indexOf(subKey)
- + subKey.length());
- }
- if (endKey == null || line.contains(endKey)) {
- if (endKey != null) {
- line = line.substring(0, line.indexOf(endKey));
- result = line;
- }
- }
- }
- }
-
- return result;
- }
-
- /**
- * Return the text between the key and the endKey (optional subKeys can be
- * passed, in this case we will look for the subKeys first, then take the
- * text between the key and the endKey).
- *
- * @param in
- * the input
- * @param key
- * the key to match
- * @param endKey
- * the end key or NULL for "up to the end"
- * @param afters
- * the sub-keys to find before checking for key/endKey
- *
- * @return the text or NULL if not found
- */
- static protected String getKeyTextAfter(String in, String key,
- String endKey, String... afters) {
-
- if (in != null && !in.isEmpty()) {
- int pos = indexOfAfter(in, 0, afters);
- if (pos < 0) {
- return null;
- }
-
- in = in.substring(pos);
- }
-
- return getKeyText(in, key, null, endKey);
- }
-
- /**
- * Return the first index after all the given "afters" have been found in
- * the {@link String}, or -1 if it was not possible.
- *
- * @param in
- * the input
- * @param startAt
- * start at this position in the string
- * @param afters
- * the sub-keys to find before checking for key/endKey
- *
- * @return the text or NULL if not found
- */
- static protected int indexOfAfter(String in, int startAt, String... afters) {
- int pos = -1;
- if (in != null && !in.isEmpty()) {
- pos = startAt;
- if (afters != null) {
- for (int i = 0; pos >= 0 && i < afters.length; i++) {
- String subKey = afters[i];
- if (!subKey.isEmpty()) {
- pos = in.indexOf(subKey, pos);
- if (pos >= 0) {
- pos += subKey.length();
- }
- }
- }
- }
+ if (support != null) {
+ support.setType(type);
+ support.source = support.getCanonicalUrl(url);
}
- return pos;
+ return support;
}
}
--- /dev/null
+package be.nikiroo.fanfix.supported;
+
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.MalformedURLException;
+import java.net.URL;
+
+import be.nikiroo.fanfix.Instance;
+import be.nikiroo.fanfix.bundles.Config;
+import be.nikiroo.utils.Image;
+
+/**
+ * Helper class for {@link BasicSupport}, mostly dedicated to text formating for
+ * the classes that implement {@link BasicSupport}.
+ *
+ * @author niki
+ */
+class BasicSupportHelper {
+ /**
+ * Get the default cover related to this subject (see <tt>.info</tt> files).
+ *
+ * @param subject
+ * the subject
+ *
+ * @return the cover if any, or NULL
+ */
+ public static Image getDefaultCover(String subject) {
+ if (subject != null && !subject.isEmpty()
+ && Instance.getCoverDir() != null) {
+ try {
+ File fileCover = new File(Instance.getCoverDir(), subject);
+ return getImage(null, fileCover.toURI().toURL(), subject);
+ } catch (MalformedURLException e) {
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * Return the list of supported image extensions.
+ *
+ * @param emptyAllowed
+ * TRUE to allow an empty extension on first place, which can be
+ * used when you may already have an extension in your input but
+ * are not sure about it
+ *
+ * @return the extensions
+ */
+ public static String[] getImageExt(boolean emptyAllowed) {
+ if (emptyAllowed) {
+ return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
+ }
+
+ return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
+ }
+
+ /**
+ * Check if the given resource can be a local image or a remote image, then
+ * refresh the cache with it if it is.
+ *
+ * @param support
+ * the linked {@link BasicSupport}
+ * @param source
+ * the story source
+ * @param line
+ * the resource to check
+ *
+ * @return the image if found, or NULL
+ *
+ */
+ public static Image getImage(BasicSupport support, URL source, String line) {
+ URL url = getImageUrl(support, source, line);
+ if (url != null) {
+ if ("file".equals(url.getProtocol())) {
+ if (new File(url.getPath()).isDirectory()) {
+ return null;
+ }
+ }
+ InputStream in = null;
+ try {
+ in = Instance.getCache().open(url,
+ BasicSupport.getSupport(url), true);
+ return new Image(in);
+ } catch (IOException e) {
+ } finally {
+ if (in != null) {
+ try {
+ in.close();
+ } catch (IOException e) {
+ }
+ }
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * Check if the given resource can be a local image or a remote image, then
+ * refresh the cache with it if it is.
+ *
+ * @param support
+ * the linked {@link BasicSupport}
+ * @param source
+ * the story source
+ * @param line
+ * the resource to check
+ *
+ * @return the image URL if found, or NULL
+ *
+ */
+ public static URL getImageUrl(BasicSupport support, URL source, String line) {
+ URL url = null;
+
+ if (line != null) {
+ // try for files
+ if (source != null) {
+ try {
+
+ String relPath = null;
+ String absPath = null;
+ try {
+ String path = new File(source.getFile()).getParent();
+ relPath = new File(new File(path), line.trim())
+ .getAbsolutePath();
+ } catch (Exception e) {
+ // Cannot be converted to path (one possibility to take
+ // into account: absolute path on Windows)
+ }
+ try {
+ absPath = new File(line.trim()).getAbsolutePath();
+ } catch (Exception e) {
+ // Cannot be converted to path (at all)
+ }
+
+ for (String ext : getImageExt(true)) {
+ File absFile = new File(absPath + ext);
+ File relFile = new File(relPath + ext);
+ if (absPath != null && absFile.exists()
+ && absFile.isFile()) {
+ url = absFile.toURI().toURL();
+ } else if (relPath != null && relFile.exists()
+ && relFile.isFile()) {
+ url = relFile.toURI().toURL();
+ }
+ }
+ } catch (Exception e) {
+ // Should not happen since we control the correct arguments
+ }
+ }
+
+ if (url == null) {
+ // try for URLs
+ try {
+ for (String ext : getImageExt(true)) {
+ if (Instance.getCache()
+ .check(new URL(line + ext), true)) {
+ url = new URL(line + ext);
+ break;
+ }
+ }
+
+ // try out of cache
+ if (url == null) {
+ for (String ext : getImageExt(true)) {
+ try {
+ url = new URL(line + ext);
+ Instance.getCache().refresh(url, support, true);
+ break;
+ } catch (IOException e) {
+ // no image with this ext
+ url = null;
+ }
+ }
+ }
+ } catch (MalformedURLException e) {
+ // Not an url
+ }
+ }
+
+ // refresh the cached file
+ if (url != null) {
+ try {
+ Instance.getCache().refresh(url, support, true);
+ } catch (IOException e) {
+ // woops, broken image
+ url = null;
+ }
+ }
+ }
+
+ return url;
+ }
+
+ /**
+ * Fix the author name if it is prefixed with some "by" {@link String}.
+ *
+ * @param author
+ * the author with a possible prefix
+ *
+ * @return the author without prefixes
+ */
+ public static String fixAuthor(String author) {
+ if (author != null) {
+ for (String suffix : new String[] { " ", ":" }) {
+ for (String byString : Instance.getConfig()
+ .getString(Config.BYS).split(",")) {
+ byString += suffix;
+ if (author.toUpperCase().startsWith(byString.toUpperCase())) {
+ author = author.substring(byString.length()).trim();
+ }
+ }
+ }
+
+ // Special case (without suffix):
+ if (author.startsWith("©")) {
+ author = author.substring(1);
+ }
+ }
+
+ return author;
+ }
+}
--- /dev/null
+package be.nikiroo.fanfix.supported;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStreamReader;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+
+import be.nikiroo.fanfix.Instance;
+import be.nikiroo.fanfix.bundles.Config;
+import be.nikiroo.fanfix.bundles.StringId;
+import be.nikiroo.fanfix.data.Chapter;
+import be.nikiroo.fanfix.data.Paragraph;
+import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
+import be.nikiroo.utils.Image;
+import be.nikiroo.utils.Progress;
+import be.nikiroo.utils.StringUtils;
+
+/**
+ * Helper class for {@link BasicSupport}, mostly dedicated to {@link Paragraph}
+ * and text formating for the {@link BasicSupport} class itself (not its
+ * children).
+ *
+ * @author niki
+ */
+class BasicSupportPara {
+ // quote chars
+ private static char openQuote = Instance.getTrans().getCharacter(
+ StringId.OPEN_SINGLE_QUOTE);
+ private static char closeQuote = Instance.getTrans().getCharacter(
+ StringId.CLOSE_SINGLE_QUOTE);
+ private static char openDoubleQuote = Instance.getTrans().getCharacter(
+ StringId.OPEN_DOUBLE_QUOTE);
+ private static char closeDoubleQuote = Instance.getTrans().getCharacter(
+ StringId.CLOSE_DOUBLE_QUOTE);
+
+ /**
+ * Create a {@link Chapter} object from the given information, formatting
+ * the content as it should be.
+ *
+ * @param support
+ * the linked {@link BasicSupport}
+ * @param source
+ * the source of the story
+ * @param number
+ * the chapter number
+ * @param name
+ * the chapter name
+ * @param content
+ * the chapter content
+ * @param pg
+ * the optional progress reporter
+ * @param html
+ * TRUE if the input content is in HTML mode
+ *
+ * @return the {@link Chapter}
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ public static Chapter makeChapter(BasicSupport support, URL source,
+ int number, String name, String content, boolean html, Progress pg)
+ throws IOException {
+ // Chapter name: process it correctly, then remove the possible
+ // redundant "Chapter x: " in front of it, or "-" (as in
+ // "Chapter 5: - Fun!" after the ": " was automatically added)
+ String chapterName = BasicSupportPara.processPara(name, false)
+ .getContent().trim();
+ for (String lang : Instance.getConfig().getString(Config.CHAPTER)
+ .split(",")) {
+ String chapterWord = Instance.getConfig().getStringX(
+ Config.CHAPTER, lang);
+ if (chapterName.startsWith(chapterWord)) {
+ chapterName = chapterName.substring(chapterWord.length())
+ .trim();
+ break;
+ }
+ }
+
+ if (chapterName.startsWith(Integer.toString(number))) {
+ chapterName = chapterName.substring(
+ Integer.toString(number).length()).trim();
+ }
+
+ while (chapterName.startsWith(":") || chapterName.startsWith("-")) {
+ chapterName = chapterName.substring(1).trim();
+ }
+ //
+
+ Chapter chap = new Chapter(number, chapterName);
+
+ if (content != null) {
+ List<Paragraph> paras = makeParagraphs(support, source, content,
+ html, pg);
+ long words = 0;
+ for (Paragraph para : paras) {
+ words += para.getWords();
+ }
+ chap.setParagraphs(paras);
+ chap.setWords(words);
+ }
+
+ return chap;
+ }
+
+ /**
+ * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
+ * and requotify them (i.e., separate them into QUOTE paragraphs and other
+ * paragraphs (quotes or not)).
+ *
+ * @param para
+ * the paragraph to requotify (not necessarily a quote)
+ * @param html
+ * TRUE if the input content is in HTML mode
+ *
+ * @return the correctly (or so we hope) quotified paragraphs
+ */
+ private static List<Paragraph> requotify(Paragraph para, boolean html) {
+ List<Paragraph> newParas = new ArrayList<Paragraph>();
+
+ if (para.getType() == ParagraphType.QUOTE
+ && para.getContent().length() > 2) {
+ String line = para.getContent();
+ boolean singleQ = line.startsWith("" + openQuote);
+ boolean doubleQ = line.startsWith("" + openDoubleQuote);
+
+ // Do not try when more than one quote at a time
+ // (some stories are not easily readable if we do)
+ if (singleQ
+ && line.indexOf(closeQuote, 1) < line
+ .lastIndexOf(closeQuote)) {
+ newParas.add(para);
+ return newParas;
+ }
+ if (doubleQ
+ && line.indexOf(closeDoubleQuote, 1) < line
+ .lastIndexOf(closeDoubleQuote)) {
+ newParas.add(para);
+ return newParas;
+ }
+ //
+
+ if (!singleQ && !doubleQ) {
+ line = openDoubleQuote + line + closeDoubleQuote;
+ newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
+ .getWords()));
+ } else {
+ char open = singleQ ? openQuote : openDoubleQuote;
+ char close = singleQ ? closeQuote : closeDoubleQuote;
+
+ int posDot = -1;
+ boolean inQuote = false;
+ int i = 0;
+ for (char car : line.toCharArray()) {
+ if (car == open) {
+ inQuote = true;
+ } else if (car == close) {
+ inQuote = false;
+ } else if (car == '.' && !inQuote) {
+ posDot = i;
+ break;
+ }
+ i++;
+ }
+
+ if (posDot >= 0) {
+ String rest = line.substring(posDot + 1).trim();
+ line = line.substring(0, posDot + 1).trim();
+ long words = 1;
+ for (char car : line.toCharArray()) {
+ if (car == ' ') {
+ words++;
+ }
+ }
+ newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
+ if (!rest.isEmpty()) {
+ newParas.addAll(requotify(processPara(rest, html), html));
+ }
+ } else {
+ newParas.add(para);
+ }
+ }
+ } else {
+ newParas.add(para);
+ }
+
+ return newParas;
+ }
+
+ /**
+ * Process a {@link Paragraph} from a raw line of text.
+ * <p>
+ * Will also fix quotes and HTML encoding if needed.
+ *
+ * @param line
+ * the raw line
+ * @param html
+ * TRUE if the input content is in HTML mode
+ *
+ * @return the processed {@link Paragraph}
+ */
+ private static Paragraph processPara(String line, boolean html) {
+ if (html) {
+ line = StringUtils.unhtml(line).trim();
+ }
+ boolean space = true;
+ boolean brk = true;
+ boolean quote = false;
+ boolean tentativeCloseQuote = false;
+ char prev = '\0';
+ int dashCount = 0;
+ long words = 1;
+
+ StringBuilder builder = new StringBuilder();
+ for (char car : line.toCharArray()) {
+ if (car != '-') {
+ if (dashCount > 0) {
+ // dash, ndash and mdash: - – —
+ // currently: always use mdash
+ builder.append(dashCount == 1 ? '-' : '—');
+ }
+ dashCount = 0;
+ }
+
+ if (tentativeCloseQuote) {
+ tentativeCloseQuote = false;
+ if (Character.isLetterOrDigit(car)) {
+ builder.append("'");
+ } else {
+ // handle double-single quotes as double quotes
+ if (prev == car) {
+ builder.append(closeDoubleQuote);
+ continue;
+ }
+
+ builder.append(closeQuote);
+ }
+ }
+
+ switch (car) {
+ case ' ': // note: unbreakable space
+ case ' ':
+ case '\t':
+ case '\n': // just in case
+ case '\r': // just in case
+ if (builder.length() > 0
+ && builder.charAt(builder.length() - 1) != ' ') {
+ words++;
+ }
+ builder.append(' ');
+ break;
+
+ case '\'':
+ if (space || (brk && quote)) {
+ quote = true;
+ // handle double-single quotes as double quotes
+ if (prev == car) {
+ builder.deleteCharAt(builder.length() - 1);
+ builder.append(openDoubleQuote);
+ } else {
+ builder.append(openQuote);
+ }
+ } else if (prev == ' ' || prev == car) {
+ // handle double-single quotes as double quotes
+ if (prev == car) {
+ builder.deleteCharAt(builder.length() - 1);
+ builder.append(openDoubleQuote);
+ } else {
+ builder.append(openQuote);
+ }
+ } else {
+ // it is a quote ("I'm off") or a 'quote' ("This
+ // 'good' restaurant"...)
+ tentativeCloseQuote = true;
+ }
+ break;
+
+ case '"':
+ if (space || (brk && quote)) {
+ quote = true;
+ builder.append(openDoubleQuote);
+ } else if (prev == ' ') {
+ builder.append(openDoubleQuote);
+ } else {
+ builder.append(closeDoubleQuote);
+ }
+ break;
+
+ case '-':
+ if (space) {
+ quote = true;
+ } else {
+ dashCount++;
+ }
+ space = false;
+ break;
+
+ case '*':
+ case '~':
+ case '/':
+ case '\\':
+ case '<':
+ case '>':
+ case '=':
+ case '+':
+ case '_':
+ case '–':
+ case '—':
+ space = false;
+ builder.append(car);
+ break;
+
+ case '‘':
+ case '`':
+ case '‹':
+ case '﹁':
+ case '〈':
+ case '「':
+ if (space || (brk && quote)) {
+ quote = true;
+ builder.append(openQuote);
+ } else {
+ // handle double-single quotes as double quotes
+ if (prev == car) {
+ builder.deleteCharAt(builder.length() - 1);
+ builder.append(openDoubleQuote);
+ } else {
+ builder.append(openQuote);
+ }
+ }
+ space = false;
+ brk = false;
+ break;
+
+ case '’':
+ case '›':
+ case '﹂':
+ case '〉':
+ case '」':
+ space = false;
+ brk = false;
+ // handle double-single quotes as double quotes
+ if (prev == car) {
+ builder.deleteCharAt(builder.length() - 1);
+ builder.append(closeDoubleQuote);
+ } else {
+ builder.append(closeQuote);
+ }
+ break;
+
+ case '«':
+ case '“':
+ case '﹃':
+ case '《':
+ case '『':
+ if (space || (brk && quote)) {
+ quote = true;
+ builder.append(openDoubleQuote);
+ } else {
+ builder.append(openDoubleQuote);
+ }
+ space = false;
+ brk = false;
+ break;
+
+ case '»':
+ case '”':
+ case '﹄':
+ case '》':
+ case '』':
+ space = false;
+ brk = false;
+ builder.append(closeDoubleQuote);
+ break;
+
+ default:
+ space = false;
+ brk = false;
+ builder.append(car);
+ break;
+ }
+
+ prev = car;
+ }
+
+ if (tentativeCloseQuote) {
+ tentativeCloseQuote = false;
+ builder.append(closeQuote);
+ }
+
+ line = builder.toString().trim();
+
+ ParagraphType type = ParagraphType.NORMAL;
+ if (space) {
+ type = ParagraphType.BLANK;
+ } else if (brk) {
+ type = ParagraphType.BREAK;
+ } else if (quote) {
+ type = ParagraphType.QUOTE;
+ }
+
+ return new Paragraph(type, line, words);
+ }
+
+ /**
+ * Convert the given content into {@link Paragraph}s.
+ *
+ * @param support
+ * the linked {@link BasicSupport}
+ * @param source
+ * the source URL of the story
+ * @param content
+ * the textual content
+ * @param html
+ * TRUE if the input content is in HTML mode
+ * @param pg
+ * the optional progress reporter
+ *
+ * @return the {@link Paragraph}s
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ private static List<Paragraph> makeParagraphs(BasicSupport support,
+ URL source, String content, boolean html, Progress pg)
+ throws IOException {
+ if (pg == null) {
+ pg = new Progress();
+ }
+
+ if (html) {
+ // Special <HR> processing:
+ content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
+ "<br/>* * *<br/>");
+ }
+
+ List<Paragraph> paras = new ArrayList<Paragraph>();
+
+ if (content != null && !content.trim().isEmpty()) {
+ if (html) {
+ String[] tab = content.split("(<p>|</p>|<br>|<br/>)");
+ pg.setMinMax(0, tab.length);
+ int i = 1;
+ for (String line : tab) {
+ if (line.startsWith("[") && line.endsWith("]")) {
+ pg.setName("Extracting image " + i);
+ }
+ paras.add(makeParagraph(support, source, line.trim(), html));
+ pg.setProgress(i++);
+ }
+ pg.setName(null);
+ } else {
+ List<String> lines = new ArrayList<String>();
+ BufferedReader buff = null;
+ try {
+ buff = new BufferedReader(
+ new InputStreamReader(new ByteArrayInputStream(
+ content.getBytes("UTF-8")), "UTF-8"));
+ for (String line = buff.readLine(); line != null; line = buff
+ .readLine()) {
+ lines.add(line.trim());
+ }
+ } finally {
+ if (buff != null) {
+ buff.close();
+ }
+ }
+
+ pg.setMinMax(0, lines.size());
+ int i = 0;
+ for (String line : lines) {
+ if (line.startsWith("[") && line.endsWith("]")) {
+ pg.setName("Extracting image " + i);
+ }
+ paras.add(makeParagraph(support, source, line, html));
+ pg.setProgress(i++);
+ }
+ pg.setName(null);
+ }
+
+ // Check quotes for "bad" format
+ List<Paragraph> newParas = new ArrayList<Paragraph>();
+ for (Paragraph para : paras) {
+ newParas.addAll(BasicSupportPara.requotify(para, html));
+ }
+ paras = newParas;
+
+ // Remove double blanks/brks
+ fixBlanksBreaks(paras);
+ }
+
+ return paras;
+ }
+
+ /**
+ * Convert the given line into a single {@link Paragraph}.
+ *
+ * @param support
+ * the linked {@link BasicSupport}
+ * @param source
+ * the source URL of the story
+ * @param line
+ * the textual content of the paragraph
+ * @param html
+ * TRUE if the input content is in HTML mode
+ *
+ * @return the {@link Paragraph}
+ */
+ private static Paragraph makeParagraph(BasicSupport support, URL source,
+ String line, boolean html) {
+ Image image = null;
+ if (line.startsWith("[") && line.endsWith("]")) {
+ image = BasicSupportHelper.getImage(support, source, line
+ .substring(1, line.length() - 1).trim());
+ }
+
+ if (image != null) {
+ return new Paragraph(image);
+ }
+
+ return BasicSupportPara.processPara(line, html);
+ }
+
+ /**
+ * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
+ * those {@link Paragraph}s.
+ * <p>
+ * The resulting list will not contain a starting or trailing blank/break
+ * nor 2 blanks or breaks following each other.
+ *
+ * @param paras
+ * the list of {@link Paragraph}s to fix
+ */
+ private static void fixBlanksBreaks(List<Paragraph> paras) {
+ boolean space = false;
+ boolean brk = true;
+ for (int i = 0; i < paras.size(); i++) {
+ Paragraph para = paras.get(i);
+ boolean thisSpace = para.getType() == ParagraphType.BLANK;
+ boolean thisBrk = para.getType() == ParagraphType.BREAK;
+
+ if (i > 0 && space && thisBrk) {
+ paras.remove(i - 1);
+ i--;
+ } else if ((space || brk) && (thisSpace || thisBrk)) {
+ paras.remove(i);
+ i--;
+ }
+
+ space = thisSpace;
+ brk = thisBrk;
+ }
+
+ // Remove blank/brk at start
+ if (paras.size() > 0
+ && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
+ 0).getType() == ParagraphType.BREAK)) {
+ paras.remove(0);
+ }
+
+ // Remove blank/brk at end
+ int last = paras.size() - 1;
+ if (paras.size() > 0
+ && (paras.get(last).getType() == ParagraphType.BLANK || paras
+ .get(last).getType() == ParagraphType.BREAK)) {
+ paras.remove(last);
+ }
+ }
+}
--- /dev/null
+package be.nikiroo.fanfix.supported;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.net.MalformedURLException;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.Date;
+import java.util.List;
+import java.util.Map.Entry;
+import java.util.Scanner;
+
+import be.nikiroo.fanfix.Instance;
+import be.nikiroo.fanfix.bundles.Config;
+import be.nikiroo.fanfix.bundles.StringId;
+import be.nikiroo.fanfix.data.Chapter;
+import be.nikiroo.fanfix.data.MetaData;
+import be.nikiroo.fanfix.data.Paragraph;
+import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
+import be.nikiroo.fanfix.data.Story;
+import be.nikiroo.utils.Image;
+import be.nikiroo.utils.Progress;
+import be.nikiroo.utils.StringUtils;
+
+/**
+ * DEPRECATED: use the new Jsoup 'Node' system.
+ * <p>
+ * This class is the base class used by the other support classes. It can be
+ * used outside of this package, and have static method that you can use to get
+ * access to the correct support class.
+ * <p>
+ * It will be used with 'resources' (usually web pages or files).
+ *
+ * @author niki
+ */
+@Deprecated
+public abstract class BasicSupport_Deprecated extends BasicSupport {
+ private InputStream in;
+ private URL currentReferer; // with only one 'r', as in 'HTTP'...
+
+ // quote chars
+ private char openQuote = Instance.getTrans().getCharacter(
+ StringId.OPEN_SINGLE_QUOTE);
+ private char closeQuote = Instance.getTrans().getCharacter(
+ StringId.CLOSE_SINGLE_QUOTE);
+ private char openDoubleQuote = Instance.getTrans().getCharacter(
+ StringId.OPEN_DOUBLE_QUOTE);
+ private char closeDoubleQuote = Instance.getTrans().getCharacter(
+ StringId.CLOSE_DOUBLE_QUOTE);
+
+ // New methods not used in Deprecated mode
+ @Override
+ protected String getDesc() throws IOException {
+ throw new RuntimeException("should not be used by legacy code");
+ }
+
+ @Override
+ protected MetaData getMeta() throws IOException {
+ throw new RuntimeException("should not be used by legacy code");
+ }
+
+ @Override
+ protected List<Entry<String, URL>> getChapters(Progress pg)
+ throws IOException {
+ throw new RuntimeException("should not be used by legacy code");
+ }
+
+ @Override
+ protected String getChapterContent(URL chapUrl, int number, Progress pg)
+ throws IOException {
+ throw new RuntimeException("should not be used by legacy code");
+ }
+
+ @Override
+ public Story process(Progress pg) throws IOException {
+ return process(getSource(), pg);
+ }
+
+ //
+
+ /**
+ * Return the {@link MetaData} of this story.
+ *
+ * @param source
+ * the source of the story
+ * @param in
+ * the input (the main resource)
+ *
+ * @return the associated {@link MetaData}, never NULL
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ protected abstract MetaData getMeta(URL source, InputStream in)
+ throws IOException;
+
+ /**
+ * Return the story description.
+ *
+ * @param source
+ * the source of the story
+ * @param in
+ * the input (the main resource)
+ *
+ * @return the description
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ protected abstract String getDesc(URL source, InputStream in)
+ throws IOException;
+
+ /**
+ * Return the list of chapters (name and resource).
+ *
+ * @param source
+ * the source of the story
+ * @param in
+ * the input (the main resource)
+ * @param pg
+ * the optional progress reporter
+ *
+ * @return the chapters
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ protected abstract List<Entry<String, URL>> getChapters(URL source,
+ InputStream in, Progress pg) throws IOException;
+
+ /**
+ * Return the content of the chapter (possibly HTML encoded, if
+ * {@link BasicSupport_Deprecated#isHtml()} is TRUE).
+ *
+ * @param source
+ * the source of the story
+ * @param in
+ * the input (the main resource)
+ * @param number
+ * the chapter number
+ * @param pg
+ * the optional progress reporter
+ *
+ * @return the content
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ protected abstract String getChapterContent(URL source, InputStream in,
+ int number, Progress pg) throws IOException;
+
+ /**
+ * Process the given story resource into a partially filled {@link Story}
+ * object containing the name and metadata, except for the description.
+ *
+ * @param url
+ * the story resource
+ *
+ * @return the {@link Story}
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ public Story processMeta(URL url) throws IOException {
+ return processMeta(url, true, false, null);
+ }
+
+ /**
+ * Process the given story resource into a partially filled {@link Story}
+ * object containing the name and metadata.
+ *
+ * @param url
+ * the story resource
+ * @param close
+ * close "this" and "in" when done
+ * @param getDesc
+ * retrieve the description of the story, or not
+ * @param pg
+ * the optional progress reporter
+ *
+ * @return the {@link Story}, never NULL
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ protected Story processMeta(URL url, boolean close, boolean getDesc,
+ Progress pg) throws IOException {
+ if (pg == null) {
+ pg = new Progress();
+ } else {
+ pg.setMinMax(0, 100);
+ }
+
+ login();
+ pg.setProgress(10);
+
+ url = getCanonicalUrl(url);
+
+ setCurrentReferer(url);
+
+ in = openInput(url); // NULL allowed here
+ try {
+ preprocess(url, getInput());
+ pg.setProgress(30);
+
+ Story story = new Story();
+ MetaData meta = getMeta(url, getInput());
+ if (meta.getCreationDate() == null
+ || meta.getCreationDate().isEmpty()) {
+ meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
+ }
+ story.setMeta(meta);
+
+ pg.setProgress(50);
+
+ if (meta.getCover() == null) {
+ meta.setCover(getDefaultCover(meta.getSubject()));
+ }
+
+ pg.setProgress(60);
+
+ if (getDesc) {
+ String descChapterName = Instance.getTrans().getString(
+ StringId.DESCRIPTION);
+ story.getMeta().setResume(
+ makeChapter(url, 0, descChapterName,
+ getDesc(url, getInput()), null));
+ }
+
+ pg.setProgress(100);
+ return story;
+ } finally {
+ if (close) {
+ close();
+
+ if (in != null) {
+ in.close();
+ }
+ }
+ }
+ }
+
+ /**
+ * Process the given story resource into a fully filled {@link Story}
+ * object.
+ *
+ * @param url
+ * the story resource
+ * @param pg
+ * the optional progress reporter
+ *
+ * @return the {@link Story}, never NULL
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ protected Story process(URL url, Progress pg) throws IOException {
+ if (pg == null) {
+ pg = new Progress();
+ } else {
+ pg.setMinMax(0, 100);
+ }
+
+ url = getCanonicalUrl(url);
+ pg.setProgress(1);
+ try {
+ Progress pgMeta = new Progress();
+ pg.addProgress(pgMeta, 10);
+ Story story = processMeta(url, false, true, pgMeta);
+ if (!pgMeta.isDone()) {
+ pgMeta.setProgress(pgMeta.getMax()); // 10%
+ }
+
+ pg.setName("Retrieving " + story.getMeta().getTitle());
+
+ setCurrentReferer(url);
+
+ Progress pgGetChapters = new Progress();
+ pg.addProgress(pgGetChapters, 10);
+ story.setChapters(new ArrayList<Chapter>());
+ List<Entry<String, URL>> chapters = getChapters(url, getInput(),
+ pgGetChapters);
+ if (!pgGetChapters.isDone()) {
+ pgGetChapters.setProgress(pgGetChapters.getMax()); // 20%
+ }
+
+ if (chapters != null) {
+ Progress pgChaps = new Progress("Extracting chapters", 0,
+ chapters.size() * 300);
+ pg.addProgress(pgChaps, 80);
+
+ long words = 0;
+ int i = 1;
+ for (Entry<String, URL> chap : chapters) {
+ pgChaps.setName("Extracting chapter " + i);
+ InputStream chapIn = null;
+ if (chap.getValue() != null) {
+ setCurrentReferer(chap.getValue());
+ chapIn = Instance.getCache().open(chap.getValue(),
+ this, false);
+ }
+ pgChaps.setProgress(i * 100);
+ try {
+ Progress pgGetChapterContent = new Progress();
+ Progress pgMakeChapter = new Progress();
+ pgChaps.addProgress(pgGetChapterContent, 100);
+ pgChaps.addProgress(pgMakeChapter, 100);
+
+ String content = getChapterContent(url, chapIn, i,
+ pgGetChapterContent);
+ if (!pgGetChapterContent.isDone()) {
+ pgGetChapterContent.setProgress(pgGetChapterContent
+ .getMax());
+ }
+
+ Chapter cc = makeChapter(url, i, chap.getKey(),
+ content, pgMakeChapter);
+ if (!pgMakeChapter.isDone()) {
+ pgMakeChapter.setProgress(pgMakeChapter.getMax());
+ }
+
+ words += cc.getWords();
+ story.getChapters().add(cc);
+ story.getMeta().setWords(words);
+ } finally {
+ if (chapIn != null) {
+ chapIn.close();
+ }
+ }
+
+ i++;
+ }
+
+ pgChaps.setName("Extracting chapters");
+ } else {
+ pg.setProgress(80);
+ }
+
+ return story;
+
+ } finally {
+ close();
+
+ if (in != null) {
+ in.close();
+ }
+ }
+ }
+
+ /**
+ * Prepare the support if needed before processing.
+ *
+ * @param source
+ * the source of the story
+ * @param in
+ * the input (the main resource)
+ *
+ * @throws IOException
+ * on I/O error
+ */
+ @SuppressWarnings("unused")
+ protected void preprocess(URL source, InputStream in) throws IOException {
+ }
+
+ /**
+ * Create a {@link Chapter} object from the given information, formatting
+ * the content as it should be.
+ *
+ * @param source
+ * the source of the story
+ * @param number
+ * the chapter number
+ * @param name
+ * the chapter name
+ * @param content
+ * the chapter content
+ * @param pg
+ * the optional progress reporter
+ *
+ * @return the {@link Chapter}
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ protected Chapter makeChapter(URL source, int number, String name,
+ String content, Progress pg) throws IOException {
+ // Chapter name: process it correctly, then remove the possible
+ // redundant "Chapter x: " in front of it, or "-" (as in
+ // "Chapter 5: - Fun!" after the ": " was automatically added)
+ String chapterName = processPara(name).getContent().trim();
+ for (String lang : Instance.getConfig().getString(Config.CHAPTER)
+ .split(",")) {
+ String chapterWord = Instance.getConfig().getStringX(
+ Config.CHAPTER, lang);
+ if (chapterName.startsWith(chapterWord)) {
+ chapterName = chapterName.substring(chapterWord.length())
+ .trim();
+ break;
+ }
+ }
+
+ if (chapterName.startsWith(Integer.toString(number))) {
+ chapterName = chapterName.substring(
+ Integer.toString(number).length()).trim();
+ }
+
+ while (chapterName.startsWith(":") || chapterName.startsWith("-")) {
+ chapterName = chapterName.substring(1).trim();
+ }
+ //
+
+ Chapter chap = new Chapter(number, chapterName);
+
+ if (content != null) {
+ List<Paragraph> paras = makeParagraphs(source, content, pg);
+ long words = 0;
+ for (Paragraph para : paras) {
+ words += para.getWords();
+ }
+ chap.setParagraphs(paras);
+ chap.setWords(words);
+ }
+
+ return chap;
+
+ }
+
+ /**
+ * Convert the given content into {@link Paragraph}s.
+ *
+ * @param source
+ * the source URL of the story
+ * @param content
+ * the textual content
+ * @param pg
+ * the optional progress reporter
+ *
+ * @return the {@link Paragraph}s
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ protected List<Paragraph> makeParagraphs(URL source, String content,
+ Progress pg) throws IOException {
+ if (pg == null) {
+ pg = new Progress();
+ }
+
+ if (isHtml()) {
+ // Special <HR> processing:
+ content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
+ "<br/>* * *<br/>");
+ }
+
+ List<Paragraph> paras = new ArrayList<Paragraph>();
+
+ if (content != null && !content.trim().isEmpty()) {
+ if (isHtml()) {
+ String[] tab = content.split("(<p>|</p>|<br>|<br/>)");
+ pg.setMinMax(0, tab.length);
+ int i = 1;
+ for (String line : tab) {
+ if (line.startsWith("[") && line.endsWith("]")) {
+ pg.setName("Extracting image " + i);
+ }
+ paras.add(makeParagraph(source, line.trim()));
+ pg.setProgress(i++);
+ }
+ pg.setName(null);
+ } else {
+ List<String> lines = new ArrayList<String>();
+ BufferedReader buff = null;
+ try {
+ buff = new BufferedReader(
+ new InputStreamReader(new ByteArrayInputStream(
+ content.getBytes("UTF-8")), "UTF-8"));
+ for (String line = buff.readLine(); line != null; line = buff
+ .readLine()) {
+ lines.add(line.trim());
+ }
+ } finally {
+ if (buff != null) {
+ buff.close();
+ }
+ }
+
+ pg.setMinMax(0, lines.size());
+ int i = 0;
+ for (String line : lines) {
+ if (line.startsWith("[") && line.endsWith("]")) {
+ pg.setName("Extracting image " + i);
+ }
+ paras.add(makeParagraph(source, line));
+ pg.setProgress(i++);
+ }
+ pg.setName(null);
+ }
+
+ // Check quotes for "bad" format
+ List<Paragraph> newParas = new ArrayList<Paragraph>();
+ for (Paragraph para : paras) {
+ newParas.addAll(requotify(para));
+ }
+ paras = newParas;
+
+ // Remove double blanks/brks
+ fixBlanksBreaks(paras);
+ }
+
+ return paras;
+ }
+
+ /**
+ * Convert the given line into a single {@link Paragraph}.
+ *
+ * @param source
+ * the source URL of the story
+ * @param line
+ * the textual content of the paragraph
+ *
+ * @return the {@link Paragraph}
+ */
+ private Paragraph makeParagraph(URL source, String line) {
+ Image image = null;
+ if (line.startsWith("[") && line.endsWith("]")) {
+ image = getImage(this, source, line.substring(1, line.length() - 1)
+ .trim());
+ }
+
+ if (image != null) {
+ return new Paragraph(image);
+ }
+
+ return processPara(line);
+ }
+
+ /**
+ * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
+ * those {@link Paragraph}s.
+ * <p>
+ * The resulting list will not contain a starting or trailing blank/break
+ * nor 2 blanks or breaks following each other.
+ *
+ * @param paras
+ * the list of {@link Paragraph}s to fix
+ */
+ protected void fixBlanksBreaks(List<Paragraph> paras) {
+ boolean space = false;
+ boolean brk = true;
+ for (int i = 0; i < paras.size(); i++) {
+ Paragraph para = paras.get(i);
+ boolean thisSpace = para.getType() == ParagraphType.BLANK;
+ boolean thisBrk = para.getType() == ParagraphType.BREAK;
+
+ if (i > 0 && space && thisBrk) {
+ paras.remove(i - 1);
+ i--;
+ } else if ((space || brk) && (thisSpace || thisBrk)) {
+ paras.remove(i);
+ i--;
+ }
+
+ space = thisSpace;
+ brk = thisBrk;
+ }
+
+ // Remove blank/brk at start
+ if (paras.size() > 0
+ && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
+ 0).getType() == ParagraphType.BREAK)) {
+ paras.remove(0);
+ }
+
+ // Remove blank/brk at end
+ int last = paras.size() - 1;
+ if (paras.size() > 0
+ && (paras.get(last).getType() == ParagraphType.BLANK || paras
+ .get(last).getType() == ParagraphType.BREAK)) {
+ paras.remove(last);
+ }
+ }
+
+ /**
+ * Get the default cover related to this subject (see <tt>.info</tt> files).
+ *
+ * @param subject
+ * the subject
+ *
+ * @return the cover if any, or NULL
+ */
+ static Image getDefaultCover(String subject) {
+ if (subject != null && !subject.isEmpty()
+ && Instance.getCoverDir() != null) {
+ try {
+ File fileCover = new File(Instance.getCoverDir(), subject);
+ return getImage(null, fileCover.toURI().toURL(), subject);
+ } catch (MalformedURLException e) {
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * Return the list of supported image extensions.
+ *
+ * @param emptyAllowed
+ * TRUE to allow an empty extension on first place, which can be
+ * used when you may already have an extension in your input but
+ * are not sure about it
+ *
+ * @return the extensions
+ */
+ static String[] getImageExt(boolean emptyAllowed) {
+ if (emptyAllowed) {
+ return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
+ }
+
+ return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
+ }
+
+ /**
+ * Check if the given resource can be a local image or a remote image, then
+ * refresh the cache with it if it is.
+ *
+ * @param source
+ * the story source
+ * @param line
+ * the resource to check
+ *
+ * @return the image if found, or NULL
+ *
+ */
+ static Image getImage(BasicSupport_Deprecated support, URL source,
+ String line) {
+ URL url = getImageUrl(support, source, line);
+ if (url != null) {
+ if ("file".equals(url.getProtocol())) {
+ if (new File(url.getPath()).isDirectory()) {
+ return null;
+ }
+ }
+ InputStream in = null;
+ try {
+ in = Instance.getCache().open(url, getSupport(url), true);
+ return new Image(in);
+ } catch (IOException e) {
+ } finally {
+ if (in != null) {
+ try {
+ in.close();
+ } catch (IOException e) {
+ }
+ }
+ }
+ }
+
+ return null;
+ }
+
+ /**
+ * Check if the given resource can be a local image or a remote image, then
+ * refresh the cache with it if it is.
+ *
+ * @param source
+ * the story source
+ * @param line
+ * the resource to check
+ *
+ * @return the image URL if found, or NULL
+ *
+ */
+ static URL getImageUrl(BasicSupport_Deprecated support, URL source,
+ String line) {
+ URL url = null;
+
+ if (line != null) {
+ // try for files
+ if (source != null) {
+ try {
+
+ String relPath = null;
+ String absPath = null;
+ try {
+ String path = new File(source.getFile()).getParent();
+ relPath = new File(new File(path), line.trim())
+ .getAbsolutePath();
+ } catch (Exception e) {
+ // Cannot be converted to path (one possibility to take
+ // into account: absolute path on Windows)
+ }
+ try {
+ absPath = new File(line.trim()).getAbsolutePath();
+ } catch (Exception e) {
+ // Cannot be converted to path (at all)
+ }
+
+ for (String ext : getImageExt(true)) {
+ File absFile = new File(absPath + ext);
+ File relFile = new File(relPath + ext);
+ if (absPath != null && absFile.exists()
+ && absFile.isFile()) {
+ url = absFile.toURI().toURL();
+ } else if (relPath != null && relFile.exists()
+ && relFile.isFile()) {
+ url = relFile.toURI().toURL();
+ }
+ }
+ } catch (Exception e) {
+ // Should not happen since we control the correct arguments
+ }
+ }
+
+ if (url == null) {
+ // try for URLs
+ try {
+ for (String ext : getImageExt(true)) {
+ if (Instance.getCache()
+ .check(new URL(line + ext), true)) {
+ url = new URL(line + ext);
+ break;
+ }
+ }
+
+ // try out of cache
+ if (url == null) {
+ for (String ext : getImageExt(true)) {
+ try {
+ url = new URL(line + ext);
+ Instance.getCache().refresh(url, support, true);
+ break;
+ } catch (IOException e) {
+ // no image with this ext
+ url = null;
+ }
+ }
+ }
+ } catch (MalformedURLException e) {
+ // Not an url
+ }
+ }
+
+ // refresh the cached file
+ if (url != null) {
+ try {
+ Instance.getCache().refresh(url, support, true);
+ } catch (IOException e) {
+ // woops, broken image
+ url = null;
+ }
+ }
+ }
+
+ return url;
+ }
+
+ /**
+ * Open the input file that will be used through the support.
+ * <p>
+ * Can return NULL, in which case you are supposed to work without an
+ * {@link InputStream}.
+ *
+ * @param source
+ * the source {@link URL}
+ *
+ * @return the {@link InputStream}
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ protected InputStream openInput(URL source) throws IOException {
+ return Instance.getCache().open(source, this, false);
+ }
+
+ /**
+ * Reset then return {@link BasicSupport_Deprecated#in}.
+ *
+ * @return {@link BasicSupport_Deprecated#in}
+ */
+ protected InputStream getInput() {
+ return reset(in);
+ }
+
+ /**
+ * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
+ * and requotify them (i.e., separate them into QUOTE paragraphs and other
+ * paragraphs (quotes or not)).
+ *
+ * @param para
+ * the paragraph to requotify (not necessarily a quote)
+ *
+ * @return the correctly (or so we hope) quotified paragraphs
+ */
+ protected List<Paragraph> requotify(Paragraph para) {
+ List<Paragraph> newParas = new ArrayList<Paragraph>();
+
+ if (para.getType() == ParagraphType.QUOTE
+ && para.getContent().length() > 2) {
+ String line = para.getContent();
+ boolean singleQ = line.startsWith("" + openQuote);
+ boolean doubleQ = line.startsWith("" + openDoubleQuote);
+
+ // Do not try when more than one quote at a time
+ // (some stories are not easily readable if we do)
+ if (singleQ
+ && line.indexOf(closeQuote, 1) < line
+ .lastIndexOf(closeQuote)) {
+ newParas.add(para);
+ return newParas;
+ }
+ if (doubleQ
+ && line.indexOf(closeDoubleQuote, 1) < line
+ .lastIndexOf(closeDoubleQuote)) {
+ newParas.add(para);
+ return newParas;
+ }
+ //
+
+ if (!singleQ && !doubleQ) {
+ line = openDoubleQuote + line + closeDoubleQuote;
+ newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
+ .getWords()));
+ } else {
+ char open = singleQ ? openQuote : openDoubleQuote;
+ char close = singleQ ? closeQuote : closeDoubleQuote;
+
+ int posDot = -1;
+ boolean inQuote = false;
+ int i = 0;
+ for (char car : line.toCharArray()) {
+ if (car == open) {
+ inQuote = true;
+ } else if (car == close) {
+ inQuote = false;
+ } else if (car == '.' && !inQuote) {
+ posDot = i;
+ break;
+ }
+ i++;
+ }
+
+ if (posDot >= 0) {
+ String rest = line.substring(posDot + 1).trim();
+ line = line.substring(0, posDot + 1).trim();
+ long words = 1;
+ for (char car : line.toCharArray()) {
+ if (car == ' ') {
+ words++;
+ }
+ }
+ newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
+ if (!rest.isEmpty()) {
+ newParas.addAll(requotify(processPara(rest)));
+ }
+ } else {
+ newParas.add(para);
+ }
+ }
+ } else {
+ newParas.add(para);
+ }
+
+ return newParas;
+ }
+
+ /**
+ * Process a {@link Paragraph} from a raw line of text.
+ * <p>
+ * Will also fix quotes and HTML encoding if needed.
+ *
+ * @param line
+ * the raw line
+ *
+ * @return the processed {@link Paragraph}
+ */
+ protected Paragraph processPara(String line) {
+ line = ifUnhtml(line).trim();
+
+ boolean space = true;
+ boolean brk = true;
+ boolean quote = false;
+ boolean tentativeCloseQuote = false;
+ char prev = '\0';
+ int dashCount = 0;
+ long words = 1;
+
+ StringBuilder builder = new StringBuilder();
+ for (char car : line.toCharArray()) {
+ if (car != '-') {
+ if (dashCount > 0) {
+ // dash, ndash and mdash: - – —
+ // currently: always use mdash
+ builder.append(dashCount == 1 ? '-' : '—');
+ }
+ dashCount = 0;
+ }
+
+ if (tentativeCloseQuote) {
+ tentativeCloseQuote = false;
+ if (Character.isLetterOrDigit(car)) {
+ builder.append("'");
+ } else {
+ // handle double-single quotes as double quotes
+ if (prev == car) {
+ builder.append(closeDoubleQuote);
+ continue;
+ }
+
+ builder.append(closeQuote);
+ }
+ }
+
+ switch (car) {
+ case ' ': // note: unbreakable space
+ case ' ':
+ case '\t':
+ case '\n': // just in case
+ case '\r': // just in case
+ if (builder.length() > 0
+ && builder.charAt(builder.length() - 1) != ' ') {
+ words++;
+ }
+ builder.append(' ');
+ break;
+
+ case '\'':
+ if (space || (brk && quote)) {
+ quote = true;
+ // handle double-single quotes as double quotes
+ if (prev == car) {
+ builder.deleteCharAt(builder.length() - 1);
+ builder.append(openDoubleQuote);
+ } else {
+ builder.append(openQuote);
+ }
+ } else if (prev == ' ' || prev == car) {
+ // handle double-single quotes as double quotes
+ if (prev == car) {
+ builder.deleteCharAt(builder.length() - 1);
+ builder.append(openDoubleQuote);
+ } else {
+ builder.append(openQuote);
+ }
+ } else {
+ // it is a quote ("I'm off") or a 'quote' ("This
+ // 'good' restaurant"...)
+ tentativeCloseQuote = true;
+ }
+ break;
+
+ case '"':
+ if (space || (brk && quote)) {
+ quote = true;
+ builder.append(openDoubleQuote);
+ } else if (prev == ' ') {
+ builder.append(openDoubleQuote);
+ } else {
+ builder.append(closeDoubleQuote);
+ }
+ break;
+
+ case '-':
+ if (space) {
+ quote = true;
+ } else {
+ dashCount++;
+ }
+ space = false;
+ break;
+
+ case '*':
+ case '~':
+ case '/':
+ case '\\':
+ case '<':
+ case '>':
+ case '=':
+ case '+':
+ case '_':
+ case '–':
+ case '—':
+ space = false;
+ builder.append(car);
+ break;
+
+ case '‘':
+ case '`':
+ case '‹':
+ case '﹁':
+ case '〈':
+ case '「':
+ if (space || (brk && quote)) {
+ quote = true;
+ builder.append(openQuote);
+ } else {
+ // handle double-single quotes as double quotes
+ if (prev == car) {
+ builder.deleteCharAt(builder.length() - 1);
+ builder.append(openDoubleQuote);
+ } else {
+ builder.append(openQuote);
+ }
+ }
+ space = false;
+ brk = false;
+ break;
+
+ case '’':
+ case '›':
+ case '﹂':
+ case '〉':
+ case '」':
+ space = false;
+ brk = false;
+ // handle double-single quotes as double quotes
+ if (prev == car) {
+ builder.deleteCharAt(builder.length() - 1);
+ builder.append(closeDoubleQuote);
+ } else {
+ builder.append(closeQuote);
+ }
+ break;
+
+ case '«':
+ case '“':
+ case '﹃':
+ case '《':
+ case '『':
+ if (space || (brk && quote)) {
+ quote = true;
+ builder.append(openDoubleQuote);
+ } else {
+ builder.append(openDoubleQuote);
+ }
+ space = false;
+ brk = false;
+ break;
+
+ case '»':
+ case '”':
+ case '﹄':
+ case '》':
+ case '』':
+ space = false;
+ brk = false;
+ builder.append(closeDoubleQuote);
+ break;
+
+ default:
+ space = false;
+ brk = false;
+ builder.append(car);
+ break;
+ }
+
+ prev = car;
+ }
+
+ if (tentativeCloseQuote) {
+ tentativeCloseQuote = false;
+ builder.append(closeQuote);
+ }
+
+ line = builder.toString().trim();
+
+ ParagraphType type = ParagraphType.NORMAL;
+ if (space) {
+ type = ParagraphType.BLANK;
+ } else if (brk) {
+ type = ParagraphType.BREAK;
+ } else if (quote) {
+ type = ParagraphType.QUOTE;
+ }
+
+ return new Paragraph(type, line, words);
+ }
+
+ /**
+ * Remove the HTML from the input <b>if</b>
+ * {@link BasicSupport_Deprecated#isHtml()} is true.
+ *
+ * @param input
+ * the input
+ *
+ * @return the no html version if needed
+ */
+ private String ifUnhtml(String input) {
+ if (isHtml() && input != null) {
+ return StringUtils.unhtml(input);
+ }
+
+ return input;
+ }
+
+ /**
+ * Reset the given {@link InputStream} and return it.
+ *
+ * @param in
+ * the {@link InputStream} to reset
+ *
+ * @return the same {@link InputStream} after reset
+ */
+ static protected InputStream reset(InputStream in) {
+ try {
+ if (in != null) {
+ in.reset();
+ }
+ } catch (IOException e) {
+ }
+
+ return in;
+ }
+
+ /**
+ * Return the first line from the given input which correspond to the given
+ * selectors.
+ *
+ * @param in
+ * the input
+ * @param needle
+ * a string that must be found inside the target line (also
+ * supports "^" at start to say "only if it starts with" the
+ * needle)
+ * @param relativeLine
+ * the line to return based upon the target line position (-1 =
+ * the line before, 0 = the target line...)
+ *
+ * @return the line
+ */
+ static protected String getLine(InputStream in, String needle,
+ int relativeLine) {
+ return getLine(in, needle, relativeLine, true);
+ }
+
+ /**
+ * Return a line from the given input which correspond to the given
+ * selectors.
+ *
+ * @param in
+ * the input
+ * @param needle
+ * a string that must be found inside the target line (also
+ * supports "^" at start to say "only if it starts with" the
+ * needle)
+ * @param relativeLine
+ * the line to return based upon the target line position (-1 =
+ * the line before, 0 = the target line...)
+ * @param first
+ * takes the first result (as opposed to the last one, which will
+ * also always spend the input)
+ *
+ * @return the line
+ */
+ static protected String getLine(InputStream in, String needle,
+ int relativeLine, boolean first) {
+ String rep = null;
+
+ reset(in);
+
+ List<String> lines = new ArrayList<String>();
+ @SuppressWarnings("resource")
+ Scanner scan = new Scanner(in, "UTF-8");
+ int index = -1;
+ scan.useDelimiter("\\n");
+ while (scan.hasNext()) {
+ lines.add(scan.next());
+
+ if (index == -1) {
+ if (needle.startsWith("^")) {
+ if (lines.get(lines.size() - 1).startsWith(
+ needle.substring(1))) {
+ index = lines.size() - 1;
+ }
+
+ } else {
+ if (lines.get(lines.size() - 1).contains(needle)) {
+ index = lines.size() - 1;
+ }
+ }
+ }
+
+ if (index >= 0 && index + relativeLine < lines.size()) {
+ rep = lines.get(index + relativeLine);
+ if (first) {
+ break;
+ }
+ }
+ }
+
+ return rep;
+ }
+
+ /**
+ * Return the text between the key and the endKey (and optional subKey can
+ * be passed, in this case we will look for the key first, then take the
+ * text between the subKey and the endKey).
+ * <p>
+ * Will only match the first line with the given key if more than one are
+ * possible. Which also means that if the subKey or endKey is not found on
+ * that line, NULL will be returned.
+ *
+ * @param in
+ * the input
+ * @param key
+ * the key to match (also supports "^" at start to say
+ * "only if it starts with" the key)
+ * @param subKey
+ * the sub key or NULL if none
+ * @param endKey
+ * the end key or NULL for "up to the end"
+ * @return the text or NULL if not found
+ */
+ static protected String getKeyLine(InputStream in, String key,
+ String subKey, String endKey) {
+ return getKeyText(getLine(in, key, 0), key, subKey, endKey);
+ }
+
+ /**
+ * Return the text between the key and the endKey (and optional subKey can
+ * be passed, in this case we will look for the key first, then take the
+ * text between the subKey and the endKey).
+ *
+ * @param in
+ * the input
+ * @param key
+ * the key to match (also supports "^" at start to say
+ * "only if it starts with" the key)
+ * @param subKey
+ * the sub key or NULL if none
+ * @param endKey
+ * the end key or NULL for "up to the end"
+ * @return the text or NULL if not found
+ */
+ static protected String getKeyText(String in, String key, String subKey,
+ String endKey) {
+ String result = null;
+
+ String line = in;
+ if (line != null && line.contains(key)) {
+ line = line.substring(line.indexOf(key) + key.length());
+ if (subKey == null || subKey.isEmpty() || line.contains(subKey)) {
+ if (subKey != null) {
+ line = line.substring(line.indexOf(subKey)
+ + subKey.length());
+ }
+ if (endKey == null || line.contains(endKey)) {
+ if (endKey != null) {
+ line = line.substring(0, line.indexOf(endKey));
+ result = line;
+ }
+ }
+ }
+ }
+
+ return result;
+ }
+
+ /**
+ * Return the text between the key and the endKey (optional subKeys can be
+ * passed, in this case we will look for the subKeys first, then take the
+ * text between the key and the endKey).
+ *
+ * @param in
+ * the input
+ * @param key
+ * the key to match
+ * @param endKey
+ * the end key or NULL for "up to the end"
+ * @param afters
+ * the sub-keys to find before checking for key/endKey
+ *
+ * @return the text or NULL if not found
+ */
+ static protected String getKeyTextAfter(String in, String key,
+ String endKey, String... afters) {
+
+ if (in != null && !in.isEmpty()) {
+ int pos = indexOfAfter(in, 0, afters);
+ if (pos < 0) {
+ return null;
+ }
+
+ in = in.substring(pos);
+ }
+
+ return getKeyText(in, key, null, endKey);
+ }
+
+ /**
+ * Return the first index after all the given "afters" have been found in
+ * the {@link String}, or -1 if it was not possible.
+ *
+ * @param in
+ * the input
+ * @param startAt
+ * start at this position in the string
+ * @param afters
+ * the sub-keys to find before checking for key/endKey
+ *
+ * @return the text or NULL if not found
+ */
+ static protected int indexOfAfter(String in, int startAt, String... afters) {
+ int pos = -1;
+ if (in != null && !in.isEmpty()) {
+ pos = startAt;
+ if (afters != null) {
+ for (int i = 0; pos >= 0 && i < afters.length; i++) {
+ String subKey = afters[i];
+ if (!subKey.isEmpty()) {
+ pos = in.indexOf(subKey, pos);
+ if (pos >= 0) {
+ pos += subKey.length();
+ }
+ }
+ }
+ }
+ }
+
+ return pos;
+ }
+}
*
* @author niki
*/
-class E621 extends BasicSupport {
+class E621 extends BasicSupport_Deprecated {
@Override
public String getSourceName() {
return "e621.net";
*
* @author niki
*/
-class EHentai extends BasicSupport {
+class EHentai extends BasicSupport_Deprecated {
@Override
public String getSourceName() {
return "e-hentai.org";
}
@Override
- protected void close() throws IOException {
+ protected void close() {
if (tmp != null && tmp.exists()) {
if (!tmp.delete()) {
tmp.deleteOnExit();
tmp = null;
if (fakeIn != null) {
- fakeIn.close();
+ try {
+ fakeIn.close();
+ } catch (Exception e) {
+ Instance.getTraceHandler().error(e);
+ }
}
super.close();
*
* @author niki
*/
-class Fanfiction extends BasicSupport {
+class Fanfiction extends BasicSupport_Deprecated {
@Override
protected boolean isHtml() {
return true;
}
}
- return fixAuthor(author);
+ return BasicSupportHelper.fixAuthor(author);
}
private String getDate(InputStream in) {
*
* @author niki
*/
-class Fimfiction extends BasicSupport {
+class Fimfiction extends BasicSupport_Deprecated {
@Override
protected boolean isHtml() {
return true;
*
* @author niki
*/
-class FimfictionApi extends BasicSupport {
+class FimfictionApi extends BasicSupport_Deprecated {
private String oauth;
private String storyId;
private String json;
import java.net.URISyntaxException;
import java.net.URL;
+import be.nikiroo.fanfix.Instance;
+
/**
* Support class for HTML files created with this program (as we need some
* metadata available in those we create).
}
@Override
- public URL getCanonicalUrl(URL source) throws IOException {
+ public URL getCanonicalUrl(URL source) {
if (source.toString().endsWith(File.separator + "index.html")) {
try {
File fakeFile = new File(source.toURI()); // "story/index.html"
fakeFile = new File(fakeFile.getParent()); // "story"
fakeFile = new File(fakeFile, fakeFile.getName()); // "story/story"
return fakeFile.toURI().toURL();
- } catch (URISyntaxException e) {
- throw new IOException(
- "file not supported (maybe not created with this program or corrupt)",
- e);
+ } catch (Exception e) {
+ Instance.getTraceHandler().error(
+ new IOException("Cannot find the right URL for "
+ + source, e));
}
}
if (withCover) {
String infoTag = getInfoTag(in, "COVER");
if (infoTag != null && !infoTag.trim().isEmpty()) {
- meta.setCover(BasicSupport.getImage(null, sourceInfoFile,
+ meta.setCover(BasicSupportHelper.getImage(null, sourceInfoFile,
infoTag));
}
// Second chance: try to check for a cover next to the info file
+ Instance.getConfig()
.getString(Config.IMAGE_FORMAT_COVER)
.toLowerCase();
- meta.setCover(BasicSupport.getImage(null, sourceInfoFile,
- info + ext));
+ meta.setCover(BasicSupportHelper.getImage(null,
+ sourceInfoFile, info + ext));
}
}
}
meta.setFakeCover(Boolean.parseBoolean(getInfoTag(in, "FAKE_COVER")));
if (withCover && meta.getCover() == null) {
- meta.setCover(BasicSupport.getDefaultCover(meta.getSubject()));
+ meta.setCover(BasicSupportHelper.getDefaultCover(meta.getSubject()));
}
return meta;
if (in != null) {
in.reset();
- String value = BasicSupport.getLine(in, key, 0);
+ String value = BasicSupport_Deprecated.getLine(in, key, 0);
if (value != null && !value.isEmpty()) {
value = value.trim().substring(key.length() - 1).trim();
if (value.startsWith("'") && value.endsWith("'")
import be.nikiroo.utils.Progress;
import be.nikiroo.utils.StringUtils;
-class MangaFox extends BasicSupport {
+class MangaFox extends BasicSupport_Deprecated {
@Override
protected boolean isHtml() {
return true;
@Override
protected boolean supports(URL url) {
+ // Broken code (see MangaFoxNew)
+ if (true)
+ return false;
return "mangafox.me".equals(url.getHost())
|| "www.mangafox.me".equals(url.getHost());
}
--- /dev/null
+package be.nikiroo.fanfix.supported;
+
+import be.nikiroo.fanfix.Instance;
+import be.nikiroo.fanfix.bundles.StringId;
+
+/**
+ * The supported input types for which we can get a {@link BasicSupport} object.
+ *
+ * @author niki
+ */
+public enum SupportType {
+ /** EPUB files created with this program */
+ EPUB,
+ /** Pure text file with some rules */
+ TEXT,
+ /** TEXT but with associated .info file */
+ INFO_TEXT,
+ /** My Little Pony fanfictions */
+ FIMFICTION,
+ /** Fanfictions from a lot of different universes */
+ FANFICTION,
+ /** Website with lots of Mangas */
+ MANGAFOX,
+ /** Furry website with comics support */
+ E621,
+ /** Furry website with stories */
+ YIFFSTAR,
+ /** Comics and images groups, mostly but not only NSFW */
+ E_HENTAI,
+ /** CBZ files */
+ CBZ,
+ /** HTML files */
+ HTML;
+
+ /**
+ * A description of this support type (more information than the
+ * {@link BasicSupport#getSourceName()}).
+ *
+ * @return the description
+ */
+ public String getDesc() {
+ String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
+ this.name());
+
+ if (desc == null) {
+ desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
+ }
+
+ return desc;
+ }
+
+ /**
+ * The name of this support type (a short version).
+ *
+ * @return the name
+ */
+ public String getSourceName() {
+ BasicSupport support = BasicSupport.getSupport(this, null);
+ if (support != null) {
+ return support.getSourceName();
+ }
+
+ return null;
+ }
+
+ @Override
+ public String toString() {
+ return super.toString().toLowerCase();
+ }
+
+ /**
+ * Call {@link SupportType#valueOf(String)} after conversion to upper case.
+ *
+ * @param typeName
+ * the possible type name
+ *
+ * @return NULL or the type
+ */
+ public static SupportType valueOfUC(String typeName) {
+ return SupportType.valueOf(typeName == null ? null : typeName
+ .toUpperCase());
+ }
+
+ /**
+ * Call {@link SupportType#valueOf(String)} after conversion to upper case
+ * but return NULL for NULL instead of raising exception.
+ *
+ * @param typeName
+ * the possible type name
+ *
+ * @return NULL or the type
+ */
+ public static SupportType valueOfNullOkUC(String typeName) {
+ if (typeName == null) {
+ return null;
+ }
+
+ return SupportType.valueOfUC(typeName);
+ }
+
+ /**
+ * Call {@link SupportType#valueOf(String)} after conversion to upper case
+ * but return NULL in case of error instead of raising an exception.
+ *
+ * @param typeName
+ * the possible type name
+ *
+ * @return NULL or the type
+ */
+ public static SupportType valueOfAllOkUC(String typeName) {
+ try {
+ return SupportType.valueOfUC(typeName);
+ } catch (Exception e) {
+ return null;
+ }
+ }
+}
\ No newline at end of file
*
* @author niki
*/
-class Text extends BasicSupport {
+class Text extends BasicSupport_Deprecated {
@Override
protected boolean isHtml() {
return false;
author = authorDate.substring(0, pos);
}
- return fixAuthor(author);
+ return BasicSupportHelper.fixAuthor(author);
}
private String getDate(InputStream in) {
*
* @author niki
*/
-class YiffStar extends BasicSupport {
+class YiffStar extends BasicSupport_Deprecated {
@Override
public String getSourceName() {
}
@Override
- public URL getCanonicalUrl(URL source) throws IOException {
- if (source.getPath().startsWith("/view")) {
- source = new URL(source.toString() + "/guest");
- InputStream in = Instance.getCache().open(source, this, false);
- String line = getLine(in, "/browse/folder/", 0);
- if (line != null) {
- String[] tab = line.split("\"");
- if (tab.length > 1) {
- String groupUrl = source.getProtocol() + "://"
- + source.getHost() + tab[1];
- return guest(groupUrl);
+ public URL getCanonicalUrl(URL source) {
+ try {
+ if (source.getPath().startsWith("/view")) {
+ source = new URL(source.toString() + "/guest");
+ InputStream in = Instance.getCache().open(source, this, false);
+ String line = getLine(in, "/browse/folder/", 0);
+ if (line != null) {
+ String[] tab = line.split("\"");
+ if (tab.length > 1) {
+ String groupUrl = source.getProtocol() + "://"
+ + source.getHost() + tab[1];
+ return guest(groupUrl);
+ }
}
}
+ } catch (Exception e) {
+ Instance.getTraceHandler().error(e);
}
return super.getCanonicalUrl(source);
import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
import be.nikiroo.fanfix.data.Story;
import be.nikiroo.fanfix.supported.BasicSupport;
-import be.nikiroo.fanfix.supported.BasicSupport.SupportType;
+import be.nikiroo.fanfix.supported.BasicSupport_Deprecated;
+import be.nikiroo.fanfix.supported.SupportType;
import be.nikiroo.utils.IOUtils;
import be.nikiroo.utils.Progress;
import be.nikiroo.utils.test.TestCase;
@Override
public void test() throws Exception {
- BasicSupport support = BasicSupport
- .getSupport(SupportType.TEXT);
+ BasicSupport support = BasicSupport.getSupport(
+ SupportType.TEXT, tmp.toURI().toURL());
- Story story = support
- .process(tmp.toURI().toURL(), null);
+ Story story = support.process(null);
assertEquals(2, story.getChapters().size());
assertEquals(1, story.getChapters().get(1)
@Override
public void test() throws Exception {
- BasicSupport support = BasicSupport
- .getSupport(SupportType.TEXT);
+ BasicSupport support = BasicSupport.getSupport(
+ SupportType.TEXT, tmp.toURI().toURL());
- Story story = support
- .process(tmp.toURI().toURL(), null);
+ Story story = support.process(null);
assertEquals(2, story.getChapters().size());
assertEquals(1, story.getChapters().get(1)
});
}
- private class BasicSupportEmpty extends BasicSupport {
+ private class BasicSupportEmpty extends BasicSupport_Deprecated {
@Override
protected String getSourceName() {
return null;