X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FBasicSupport.java;h=0ee12fec463cef8b1c6dd31093ad2401c9ef4587;hb=e8eeea0a321493d270c35f594a8bf392cc95f4df;hp=74f11156e82c265a0eee1ddbd09b7c417a98ed12;hpb=08fe2e33007063e30fe22dc1d290f8afaa18eb1d;p=nikiroo-utils.git
diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport.java b/src/be/nikiroo/fanfix/supported/BasicSupport.java
index 74f1115..0ee12fe 100644
--- a/src/be/nikiroo/fanfix/supported/BasicSupport.java
+++ b/src/be/nikiroo/fanfix/supported/BasicSupport.java
@@ -1,13 +1,16 @@
package be.nikiroo.fanfix.supported;
+import java.awt.image.BufferedImage;
+import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
+import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
-import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
+import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@@ -20,8 +23,10 @@ import be.nikiroo.fanfix.bundles.StringId;
import be.nikiroo.fanfix.data.Chapter;
import be.nikiroo.fanfix.data.MetaData;
import be.nikiroo.fanfix.data.Paragraph;
-import be.nikiroo.fanfix.data.Story;
import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
+import be.nikiroo.fanfix.data.Story;
+import be.nikiroo.utils.IOUtils;
+import be.nikiroo.utils.Progress;
import be.nikiroo.utils.StringUtils;
/**
@@ -55,8 +60,12 @@ public abstract class BasicSupport {
MANGAFOX,
/** Furry website with comics support */
E621,
+ /** Furry website with stories */
+ YIFFSTAR,
/** CBZ files */
- CBZ;
+ CBZ,
+ /** HTML files */
+ HTML;
/**
* A description of this support type (more information than the
@@ -142,19 +151,18 @@ public abstract class BasicSupport {
}
}
- /** Only used by {@link BasicSupport#getInput()} just so it is always reset. */
private InputStream in;
private SupportType type;
- private URL currentReferer; // with on 'r', as in 'HTTP'...
+ private URL currentReferer; // with only one 'r', as in 'HTTP'...
// quote chars
- private char openQuote = Instance.getTrans().getChar(
+ private char openQuote = Instance.getTrans().getCharacter(
StringId.OPEN_SINGLE_QUOTE);
- private char closeQuote = Instance.getTrans().getChar(
+ private char closeQuote = Instance.getTrans().getCharacter(
StringId.CLOSE_SINGLE_QUOTE);
- private char openDoubleQuote = Instance.getTrans().getChar(
+ private char openDoubleQuote = Instance.getTrans().getCharacter(
StringId.OPEN_DOUBLE_QUOTE);
- private char closeDoubleQuote = Instance.getTrans().getChar(
+ private char closeDoubleQuote = Instance.getTrans().getCharacter(
StringId.CLOSE_DOUBLE_QUOTE);
/**
@@ -182,70 +190,7 @@ public abstract class BasicSupport {
*/
protected abstract boolean isHtml();
- /**
- * Return the story title.
- *
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
- *
- * @return the title
- *
- * @throws IOException
- * in case of I/O error
- */
- protected abstract String getTitle(URL source, InputStream in)
- throws IOException;
-
- /**
- * Return the story author.
- *
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
- *
- * @return the author
- *
- * @throws IOException
- * in case of I/O error
- */
- protected abstract String getAuthor(URL source, InputStream in)
- throws IOException;
-
- /**
- * Return the story publication date.
- *
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
- *
- * @return the date
- *
- * @throws IOException
- * in case of I/O error
- */
- protected abstract String getDate(URL source, InputStream in)
- throws IOException;
-
- /**
- * Return the subject of the story (for instance, if it is a fanfiction,
- * what is the original work; if it is a technical text, what is the
- * technical subject...).
- *
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
- *
- * @return the subject
- *
- * @throws IOException
- * in case of I/O error
- */
- protected abstract String getSubject(URL source, InputStream in)
+ protected abstract MetaData getMeta(URL source, InputStream in)
throws IOException;
/**
@@ -264,24 +209,6 @@ public abstract class BasicSupport {
protected abstract String getDesc(URL source, InputStream in)
throws IOException;
- /**
- * Return the story cover resource if any, or NULL if none.
- *
- * The default cover should not be checked for here.
- *
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
- *
- * @return the cover or NULL
- *
- * @throws IOException
- * in case of I/O error
- */
- protected abstract URL getCover(URL source, InputStream in)
- throws IOException;
-
/**
* Return the list of chapters (name and resource).
*
@@ -289,6 +216,8 @@ public abstract class BasicSupport {
* the source of the story
* @param in
* the input (the main resource)
+ * @param pg
+ * the optional progress reporter
*
* @return the chapters
*
@@ -296,7 +225,7 @@ public abstract class BasicSupport {
* in case of I/O error
*/
protected abstract List> getChapters(URL source,
- InputStream in) throws IOException;
+ InputStream in, Progress pg) throws IOException;
/**
* Return the content of the chapter (possibly HTML encoded, if
@@ -308,6 +237,8 @@ public abstract class BasicSupport {
* the input (the main resource)
* @param number
* the chapter number
+ * @param pg
+ * the optional progress reporter
*
* @return the content
*
@@ -315,16 +246,16 @@ public abstract class BasicSupport {
* in case of I/O error
*/
protected abstract String getChapterContent(URL source, InputStream in,
- int number) throws IOException;
+ int number, Progress pg) throws IOException;
/**
- * Check if this {@link BasicSupport} is mainly catered to image files.
+ * Log into the support (can be a no-op depending upon the support).
*
- * @return TRUE if it is
+ * @throws IOException
+ * in case of I/O error
*/
- public boolean isImageDocument(URL source, InputStream in)
- throws IOException {
- return false;
+ public void login() throws IOException {
+
}
/**
@@ -335,11 +266,29 @@ public abstract class BasicSupport {
* it.
*
* @return the cookies
+ *
+ * @throws IOException
+ * in case of I/O error
*/
- public Map getCookies() {
+ public Map getCookies() throws IOException {
return new HashMap();
}
+ /**
+ * Return the canonical form of the main {@link URL}.
+ *
+ * @param source
+ * the source {@link URL}
+ *
+ * @return the canonical form of this {@link URL}
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ public URL getCanonicalUrl(URL source) throws IOException {
+ return source;
+ }
+
/**
* Process the given story resource into a partially filled {@link Story}
* object containing the name and metadata, except for the description.
@@ -353,7 +302,7 @@ public abstract class BasicSupport {
* in case of I/O error
*/
public Story processMeta(URL url) throws IOException {
- return processMeta(url, true, false);
+ return processMeta(url, true, false, null);
}
/**
@@ -365,46 +314,63 @@ public abstract class BasicSupport {
*
* @param close
* close "this" and "in" when done
+ * @param pg
+ * the optional progress reporter
*
* @return the {@link Story}
*
* @throws IOException
* in case of I/O error
*/
- protected Story processMeta(URL url, boolean close, boolean getDesc)
- throws IOException {
- in = Instance.getCache().open(url, this, false);
+ protected Story processMeta(URL url, boolean close, boolean getDesc,
+ Progress pg) throws IOException {
+ if (pg == null) {
+ pg = new Progress();
+ } else {
+ pg.setMinMax(0, 100);
+ }
+
+ login();
+ pg.setProgress(10);
+
+ url = getCanonicalUrl(url);
+
+ setCurrentReferer(url);
+
+ in = openInput(url);
if (in == null) {
return null;
}
try {
- preprocess(getInput());
+ preprocess(url, getInput());
+ pg.setProgress(30);
Story story = new Story();
- story.setMeta(new MetaData());
- story.getMeta().setTitle(ifUnhtml(getTitle(url, getInput())));
- story.getMeta().setAuthor(
- fixAuthor(ifUnhtml(getAuthor(url, getInput()))));
- story.getMeta().setDate(ifUnhtml(getDate(url, getInput())));
- story.getMeta().setTags(getTags(url, getInput()));
- story.getMeta().setSource(getSourceName());
- story.getMeta().setPublisher(
- ifUnhtml(getPublisher(url, getInput())));
- story.getMeta().setUuid(getUuid(url, getInput()));
- story.getMeta().setLuid(getLuid(url, getInput()));
- story.getMeta().setLang(getLang(url, getInput()));
- story.getMeta().setSubject(ifUnhtml(getSubject(url, getInput())));
- story.getMeta().setImageDocument(isImageDocument(url, getInput()));
+ MetaData meta = getMeta(url, getInput());
+ if (meta.getCreationDate() == null
+ || meta.getCreationDate().isEmpty()) {
+ meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
+ }
+ story.setMeta(meta);
+
+ pg.setProgress(50);
+
+ if (meta != null && meta.getCover() == null) {
+ meta.setCover(getDefaultCover(meta.getSubject()));
+ }
+
+ pg.setProgress(60);
if (getDesc) {
String descChapterName = Instance.getTrans().getString(
StringId.DESCRIPTION);
story.getMeta().setResume(
makeChapter(url, 0, descChapterName,
- getDesc(url, getInput())));
+ getDesc(url, getInput()), null));
}
+ pg.setProgress(100);
return story;
} finally {
if (close) {
@@ -418,6 +384,8 @@ public abstract class BasicSupport {
in.close();
}
}
+
+ setCurrentReferer(null);
}
}
@@ -427,64 +395,96 @@ public abstract class BasicSupport {
*
* @param url
* the story resource
+ * @param pg
+ * the optional progress reporter
*
* @return the {@link Story}
*
* @throws IOException
* in case of I/O error
*/
- public Story process(URL url) throws IOException {
- setCurrentReferer(url);
+ public Story process(URL url, Progress pg) throws IOException {
+ if (pg == null) {
+ pg = new Progress();
+ } else {
+ pg.setMinMax(0, 100);
+ }
+ url = getCanonicalUrl(url);
+ pg.setProgress(1);
try {
- Story story = processMeta(url, false, true);
+ Progress pgMeta = new Progress();
+ pg.addProgress(pgMeta, 10);
+ Story story = processMeta(url, false, true, pgMeta);
+ if (!pgMeta.isDone()) {
+ pgMeta.setProgress(pgMeta.getMax()); // 10%
+ }
+
if (story == null) {
+ pg.setProgress(90);
return null;
}
- story.setChapters(new ArrayList());
+ pg.setName("Retrieving " + story.getMeta().getTitle());
- URL cover = getCover(url, getInput());
- if (cover == null) {
- String subject = story.getMeta() == null ? null : story
- .getMeta().getSubject();
- if (subject != null && !subject.isEmpty()
- && Instance.getCoverDir() != null) {
- File fileCover = new File(Instance.getCoverDir(), subject);
- cover = getImage(fileCover.toURI().toURL(), subject);
- }
- }
+ setCurrentReferer(url);
- if (cover != null) {
- InputStream coverIn = null;
- try {
- coverIn = Instance.getCache().open(cover, this, true);
- story.getMeta().setCover(StringUtils.toImage(coverIn));
- } catch (IOException e) {
- Instance.syserr(new IOException(Instance.getTrans()
- .getString(StringId.ERR_BS_NO_COVER, cover), e));
- } finally {
- if (coverIn != null)
- coverIn.close();
- }
+ Progress pgGetChapters = new Progress();
+ pg.addProgress(pgGetChapters, 10);
+ story.setChapters(new ArrayList());
+ List> chapters = getChapters(url, getInput(),
+ pgGetChapters);
+ if (!pgGetChapters.isDone()) {
+ pgGetChapters.setProgress(pgGetChapters.getMax()); // 20%
}
- List> chapters = getChapters(url, getInput());
- int i = 1;
if (chapters != null) {
+ Progress pgChaps = new Progress("Extracting chapters", 0,
+ chapters.size() * 300);
+ pg.addProgress(pgChaps, 80);
+
+ long words = 0;
+ int i = 1;
for (Entry chap : chapters) {
+ pgChaps.setName("Extracting chapter " + i);
setCurrentReferer(chap.getValue());
InputStream chapIn = Instance.getCache().open(
chap.getValue(), this, true);
+ pgChaps.setProgress(i * 100);
try {
- story.getChapters().add(
- makeChapter(url, i, chap.getKey(),
- getChapterContent(url, chapIn, i)));
+ Progress pgGetChapterContent = new Progress();
+ Progress pgMakeChapter = new Progress();
+ pgChaps.addProgress(pgGetChapterContent, 100);
+ pgChaps.addProgress(pgMakeChapter, 100);
+
+ String content = getChapterContent(url, chapIn, i,
+ pgGetChapterContent);
+ if (!pgGetChapterContent.isDone()) {
+ pgGetChapterContent.setProgress(pgGetChapterContent
+ .getMax());
+ }
+
+ Chapter cc = makeChapter(url, i, chap.getKey(),
+ content, pgMakeChapter);
+ if (!pgMakeChapter.isDone()) {
+ pgMakeChapter.setProgress(pgMakeChapter.getMax());
+ }
+
+ words += cc.getWords();
+ story.getChapters().add(cc);
+ if (story.getMeta() != null) {
+ story.getMeta().setWords(words);
+ }
} finally {
chapIn.close();
}
+
i++;
}
+
+ pgChaps.setName("Extracting chapters");
+ } else {
+ pg.setProgress(80);
}
return story;
@@ -500,12 +500,12 @@ public abstract class BasicSupport {
in.close();
}
- currentReferer = null;
+ setCurrentReferer(null);
}
}
/**
- * The support type.$
+ * The support type.
*
* @return the type
*/
@@ -548,177 +548,17 @@ public abstract class BasicSupport {
}
/**
- * Return the story publisher (by default,
- * {@link BasicSupport#getSourceName()}).
- *
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
- *
- * @return the publisher
- *
- * @throws IOException
- * in case of I/O error
- */
- protected String getPublisher(URL source, InputStream in)
- throws IOException {
- return getSourceName();
- }
-
- /**
- * Return the story UUID, a unique value representing the story (it is often
- * an URL).
- *
- * By default, this is the {@link URL} of the resource.
- *
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
- *
- * @return the uuid
- *
- * @throws IOException
- * in case of I/O error
- */
- protected String getUuid(URL source, InputStream in) throws IOException {
- return source.toString();
- }
-
- /**
- * Return the story Library UID, a unique value representing the story (it
- * is often a number) in the local library.
- *
- * By default, this is empty.
- *
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
- *
- * @return the id
- *
- * @throws IOException
- * in case of I/O error
- */
- protected String getLuid(URL source, InputStream in) throws IOException {
- return "";
- }
-
- /**
- * Return the 2-letter language code of this story.
- *
- * By default, this is 'EN'.
- *
- * @param source
- * the source of the story
- * @param in
- * the input (the main resource)
- *
- * @return the language
- *
- * @throws IOException
- * in case of I/O error
- */
- protected String getLang(URL source, InputStream in) throws IOException {
- return "EN";
- }
-
- /**
- * Return the list of tags for this story.
+ * Prepare the support if needed before processing.
*
* @param source
* the source of the story
* @param in
* the input (the main resource)
*
- * @return the tags
- *
- * @throws IOException
- * in case of I/O error
- */
- protected List getTags(URL source, InputStream in)
- throws IOException {
- return new ArrayList();
- }
-
- /**
- * Return the first line from the given input which correspond to the given
- * selectors.
- *
- * Do not reset the input, which will be pointing at the line just after the
- * result (input will be spent if no result is found).
- *
- * @param in
- * the input
- * @param needle
- * a string that must be found inside the target line
- * @param relativeLine
- * the line to return based upon the target line position (-1 =
- * the line before, 0 = the target line...)
- *
- * @return the line
- */
- protected String getLine(InputStream in, String needle, int relativeLine) {
- return getLine(in, needle, relativeLine, true);
- }
-
- /**
- * Return a line from the given input which correspond to the given
- * selectors.
- *
- * Do not reset the input, which will be pointing at the line just after the
- * result (input will be spent if no result is found) when first is TRUE,
- * and will always be spent if first is FALSE.
- *
- * @param in
- * the input
- * @param needle
- * a string that must be found inside the target line
- * @param relativeLine
- * the line to return based upon the target line position (-1 =
- * the line before, 0 = the target line...)
- * @param first
- * takes the first result (as opposed to the last one, which will
- * also always spend the input)
- *
- * @return the line
- */
- protected String getLine(InputStream in, String needle, int relativeLine,
- boolean first) {
- String rep = null;
-
- List lines = new ArrayList();
- @SuppressWarnings("resource")
- Scanner scan = new Scanner(in, "UTF-8");
- int index = -1;
- scan.useDelimiter("\\n");
- while (scan.hasNext()) {
- lines.add(scan.next());
-
- if (index == -1 && lines.get(lines.size() - 1).contains(needle)) {
- index = lines.size() - 1;
- }
-
- if (index >= 0 && index + relativeLine < lines.size()) {
- rep = lines.get(index + relativeLine);
- if (first) {
- break;
- }
- }
- }
-
- return rep;
- }
-
- /**
- * Prepare the support if needed before processing.
- *
* @throws IOException
* on I/O error
*/
- protected void preprocess(InputStream in) throws IOException {
+ protected void preprocess(URL source, InputStream in) throws IOException {
}
/**
@@ -740,6 +580,8 @@ public abstract class BasicSupport {
* the chapter name
* @param content
* the chapter content
+ * @param pg
+ * the optional progress reporter
*
* @return the {@link Chapter}
*
@@ -747,8 +589,7 @@ public abstract class BasicSupport {
* in case of I/O error
*/
protected Chapter makeChapter(URL source, int number, String name,
- String content) throws IOException {
-
+ String content, Progress pg) throws IOException {
// Chapter name: process it correctly, then remove the possible
// redundant "Chapter x: " in front of it
String chapterName = processPara(name).getContent().trim();
@@ -775,39 +616,89 @@ public abstract class BasicSupport {
Chapter chap = new Chapter(number, chapterName);
- if (content == null) {
- return chap;
+ if (content != null) {
+ List paras = makeParagraphs(source, content, pg);
+ long words = 0;
+ for (Paragraph para : paras) {
+ words += para.getWords();
+ }
+ chap.setParagraphs(paras);
+ chap.setWords(words);
+ }
+
+ return chap;
+
+ }
+
+ /**
+ * Convert the given content into {@link Paragraph}s.
+ *
+ * @param source
+ * the source URL of the story
+ * @param content
+ * the textual content
+ * @param pg
+ * the optional progress reporter
+ *
+ * @return the {@link Paragraph}s
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ protected List makeParagraphs(URL source, String content,
+ Progress pg) throws IOException {
+ if (pg == null) {
+ pg = new Progress();
}
if (isHtml()) {
// Special
processing:
content = content.replaceAll("(
]*>)|(
)|(
)",
- "\n* * *\n");
+ "
* * *
");
}
- InputStream in = new ByteArrayInputStream(
- content.getBytes(StandardCharsets.UTF_8));
- try {
- @SuppressWarnings("resource")
- Scanner scan = new Scanner(in, "UTF-8");
- scan.useDelimiter("(\\n|
)"); // \n for test,
for html
-
- List paras = new ArrayList();
- while (scan.hasNext()) {
- String line = scan.next().trim();
- boolean image = false;
- if (line.startsWith("[") && line.endsWith("]")) {
- URL url = getImage(source,
- line.substring(1, line.length() - 1).trim());
- if (url != null) {
- paras.add(new Paragraph(url));
- image = true;
+ List paras = new ArrayList();
+
+ if (content != null && !content.trim().isEmpty()) {
+ if (isHtml()) {
+ String[] tab = content.split("(|
|
|
)");
+ pg.setMinMax(0, tab.length);
+ int i = 1;
+ for (String line : tab) {
+ if (line.startsWith("[") && line.endsWith("]")) {
+ pg.setName("Extracting image " + i);
+ }
+ paras.add(makeParagraph(source, line.trim()));
+ pg.setProgress(i++);
+ }
+ pg.setName(null);
+ } else {
+ List lines = new ArrayList();
+ BufferedReader buff = null;
+ try {
+ buff = new BufferedReader(
+ new InputStreamReader(new ByteArrayInputStream(
+ content.getBytes("UTF-8")), "UTF-8"));
+ for (String line = buff.readLine(); line != null; line = buff
+ .readLine()) {
+ lines.add(line.trim());
+ }
+ } finally {
+ if (buff != null) {
+ buff.close();
}
}
- if (!image) {
- paras.add(processPara(line));
+ pg.setMinMax(0, lines.size());
+ int i = 0;
+ for (String line : lines) {
+ if (line.startsWith("[") && line.endsWith("]")) {
+ pg.setName("Extracting image " + i);
+ }
+ paras.add(makeParagraph(source, line));
+ pg.setProgress(i++);
}
+ pg.setName(null);
}
// Check quotes for "bad" format
@@ -818,54 +709,114 @@ public abstract class BasicSupport {
paras = newParas;
// Remove double blanks/brks
- boolean space = false;
- boolean brk = true;
- for (int i = 0; i < paras.size(); i++) {
- Paragraph para = paras.get(i);
- boolean thisSpace = para.getType() == ParagraphType.BLANK;
- boolean thisBrk = para.getType() == ParagraphType.BREAK;
-
- if (space && thisBrk) {
- paras.remove(i - 1);
- i--;
- } else if ((space || brk) && (thisSpace || thisBrk)) {
- paras.remove(i);
- i--;
- }
+ fixBlanksBreaks(paras);
+ }
- space = thisSpace;
- brk = thisBrk;
- }
+ return paras;
+ }
- // Remove blank/brk at start
- if (paras.size() > 0
- && (paras.get(0).getType() == ParagraphType.BLANK || paras
- .get(0).getType() == ParagraphType.BREAK)) {
- paras.remove(0);
- }
+ /**
+ * Convert the given line into a single {@link Paragraph}.
+ *
+ * @param source
+ * the source URL of the story
+ * @param line
+ * the textual content of the paragraph
+ *
+ * @return the {@link Paragraph}
+ */
+ private Paragraph makeParagraph(URL source, String line) {
+ URL image = null;
+ if (line.startsWith("[") && line.endsWith("]")) {
+ image = getImageUrl(this, source,
+ line.substring(1, line.length() - 1).trim());
+ }
- // Remove blank/brk at end
- int last = paras.size() - 1;
- if (paras.size() > 0
- && (paras.get(last).getType() == ParagraphType.BLANK || paras
- .get(last).getType() == ParagraphType.BREAK)) {
- paras.remove(last);
+ if (image != null) {
+ return new Paragraph(image);
+ } else {
+ return processPara(line);
+ }
+ }
+
+ /**
+ * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
+ * those {@link Paragraph}s.
+ *
+ * The resulting list will not contain a starting or trailing blank/break
+ * nor 2 blanks or breaks following each other.
+ *
+ * @param paras
+ * the list of {@link Paragraph}s to fix
+ */
+ protected void fixBlanksBreaks(List paras) {
+ boolean space = false;
+ boolean brk = true;
+ for (int i = 0; i < paras.size(); i++) {
+ Paragraph para = paras.get(i);
+ boolean thisSpace = para.getType() == ParagraphType.BLANK;
+ boolean thisBrk = para.getType() == ParagraphType.BREAK;
+
+ if (i > 0 && space && thisBrk) {
+ paras.remove(i - 1);
+ i--;
+ } else if ((space || brk) && (thisSpace || thisBrk)) {
+ paras.remove(i);
+ i--;
}
- chap.setParagraphs(paras);
+ space = thisSpace;
+ brk = thisBrk;
+ }
- return chap;
- } finally {
- in.close();
+ // Remove blank/brk at start
+ if (paras.size() > 0
+ && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
+ 0).getType() == ParagraphType.BREAK)) {
+ paras.remove(0);
+ }
+
+ // Remove blank/brk at end
+ int last = paras.size() - 1;
+ if (paras.size() > 0
+ && (paras.get(last).getType() == ParagraphType.BLANK || paras
+ .get(last).getType() == ParagraphType.BREAK)) {
+ paras.remove(last);
}
}
+ /**
+ * Get the default cover related to this subject (see .info files).
+ *
+ * @param subject
+ * the subject
+ *
+ * @return the cover if any, or NULL
+ */
+ static BufferedImage getDefaultCover(String subject) {
+ if (subject != null && !subject.isEmpty()
+ && Instance.getCoverDir() != null) {
+ try {
+ File fileCover = new File(Instance.getCoverDir(), subject);
+ return getImage(null, fileCover.toURI().toURL(), subject);
+ } catch (MalformedURLException e) {
+ }
+ }
+
+ return null;
+ }
+
/**
* Return the list of supported image extensions.
*
+ * @param emptyAllowed
+ * TRUE to allow an empty extension on first place, which can be
+ * used when you may already have an extension in your input but
+ * are not sure about it
+ *
* @return the extensions
*/
- protected String[] getImageExt(boolean emptyAllowed) {
+ static String[] getImageExt(boolean emptyAllowed) {
if (emptyAllowed) {
return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
} else {
@@ -882,60 +833,99 @@ public abstract class BasicSupport {
* @param line
* the resource to check
*
- * @return the image URL if found, or NULL
+ * @return the image if found, or NULL
*
*/
- protected URL getImage(URL source, String line) {
- String path = new File(source.getFile()).getParent();
- URL url = null;
-
- // try for files
- try {
- String urlBase = new File(new File(path), line.trim()).toURI()
- .toURL().toString();
- for (String ext : getImageExt(true)) {
- if (new File(urlBase + ext).exists()) {
- url = new File(urlBase + ext).toURI().toURL();
+ static BufferedImage getImage(BasicSupport support, URL source, String line) {
+ URL url = getImageUrl(support, source, line);
+ if (url != null) {
+ InputStream in = null;
+ try {
+ in = Instance.getCache().open(url, getSupport(url), true);
+ return IOUtils.toImage(in);
+ } catch (IOException e) {
+ } finally {
+ if (in != null) {
+ try {
+ in.close();
+ } catch (IOException e) {
+ }
}
}
- } catch (Exception e) {
- // Nothing to do here
}
- if (url == null) {
- // try for URLs
- try {
- for (String ext : getImageExt(true)) {
- if (Instance.getCache().check(new URL(line + ext))) {
- url = new URL(line + ext);
+ return null;
+ }
+
+ /**
+ * Check if the given resource can be a local image or a remote image, then
+ * refresh the cache with it if it is.
+ *
+ * @param source
+ * the story source
+ * @param line
+ * the resource to check
+ *
+ * @return the image URL if found, or NULL
+ *
+ */
+ static URL getImageUrl(BasicSupport support, URL source, String line) {
+ URL url = null;
+
+ if (line != null) {
+ // try for files
+ String path = null;
+ if (source != null) {
+ path = new File(source.getFile()).getParent();
+ try {
+ String basePath = new File(new File(path), line.trim())
+ .getAbsolutePath();
+ for (String ext : getImageExt(true)) {
+ if (new File(basePath + ext).exists()) {
+ url = new File(basePath + ext).toURI().toURL();
+ }
}
+ } catch (Exception e) {
+ // Nothing to do here
}
+ }
- // try out of cache
- if (url == null) {
+ if (url == null) {
+ // try for URLs
+ try {
for (String ext : getImageExt(true)) {
- try {
+ if (Instance.getCache().check(new URL(line + ext))) {
url = new URL(line + ext);
- Instance.getCache().refresh(url, this, true);
break;
- } catch (IOException e) {
- // no image with this ext
- url = null;
}
}
+
+ // try out of cache
+ if (url == null) {
+ for (String ext : getImageExt(true)) {
+ try {
+ url = new URL(line + ext);
+ Instance.getCache().refresh(url, support, true);
+ break;
+ } catch (IOException e) {
+ // no image with this ext
+ url = null;
+ }
+ }
+ }
+ } catch (MalformedURLException e) {
+ // Not an url
}
- } catch (MalformedURLException e) {
- // Not an url
}
- }
- // refresh the cached file
- if (url != null) {
- try {
- Instance.getCache().refresh(url, this, true);
- } catch (IOException e) {
- // woops, broken image
- url = null;
+ // refresh the cached file
+ if (url != null) {
+ try {
+ Instance.getCache().refresh(url, support, true);
+ } catch (IOException e) {
+ // woops, broken image
+ url = null;
+ }
}
}
@@ -943,18 +933,45 @@ public abstract class BasicSupport {
}
/**
- * Reset then return {@link BasicSupport#in}.
+ * Open the input file that will be used through the support.
*
- * @return {@link BasicSupport#in}
+ * @param source
+ * the source {@link URL}
+ *
+ * @return the {@link InputStream}
*
* @throws IOException
* in case of I/O error
*/
- protected InputStream getInput() throws IOException {
- in.reset();
+ protected InputStream openInput(URL source) throws IOException {
+ return Instance.getCache().open(source, this, false);
+ }
+
+ /**
+ * Reset the given {@link InputStream} and return it.
+ *
+ * @param in
+ * the {@link InputStream} to reset
+ *
+ * @return the same {@link InputStream} after reset
+ */
+ protected InputStream reset(InputStream in) {
+ try {
+ in.reset();
+ } catch (IOException e) {
+ }
return in;
}
+ /**
+ * Reset then return {@link BasicSupport#in}.
+ *
+ * @return {@link BasicSupport#in}
+ */
+ protected InputStream getInput() {
+ return reset(in);
+ }
+
/**
* Fix the author name if it is prefixed with some "by" {@link String}.
*
@@ -963,7 +980,7 @@ public abstract class BasicSupport {
*
* @return the author without prefixes
*/
- private String fixAuthor(String author) {
+ protected String fixAuthor(String author) {
if (author != null) {
for (String suffix : new String[] { " ", ":" }) {
for (String byString : Instance.getConfig()
@@ -990,34 +1007,71 @@ public abstract class BasicSupport {
* paragraphs (quotes or not)).
*
* @param para
- * the paragraph to requotify (not necessaraly a quote)
+ * the paragraph to requotify (not necessarily a quote)
*
* @return the correctly (or so we hope) quotified paragraphs
*/
- private List requotify(Paragraph para) {
+ protected List requotify(Paragraph para) {
List newParas = new ArrayList();
- if (para.getType() == ParagraphType.QUOTE) {
+ if (para.getType() == ParagraphType.QUOTE
+ && para.getContent().length() > 2) {
String line = para.getContent();
boolean singleQ = line.startsWith("" + openQuote);
boolean doubleQ = line.startsWith("" + openDoubleQuote);
+ // Do not try when more than one quote at a time
+ // (some stories are not easily readable if we do)
+ if (singleQ
+ && line.indexOf(closeQuote, 1) < line
+ .lastIndexOf(closeQuote)) {
+ newParas.add(para);
+ return newParas;
+ }
+ if (doubleQ
+ && line.indexOf(closeDoubleQuote, 1) < line
+ .lastIndexOf(closeDoubleQuote)) {
+ newParas.add(para);
+ return newParas;
+ }
+ //
+
if (!singleQ && !doubleQ) {
line = openDoubleQuote + line + closeDoubleQuote;
- newParas.add(new Paragraph(ParagraphType.QUOTE, line));
+ newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
+ .getWords()));
} else {
+ char open = singleQ ? openQuote : openDoubleQuote;
char close = singleQ ? closeQuote : closeDoubleQuote;
- int posClose = line.indexOf(close);
- int posDot = line.indexOf(".");
- while (posDot >= 0 && posDot < posClose) {
- posDot = line.indexOf(".", posDot + 1);
+
+ int posDot = -1;
+ boolean inQuote = false;
+ int i = 0;
+ for (char car : line.toCharArray()) {
+ if (car == open) {
+ inQuote = true;
+ } else if (car == close) {
+ inQuote = false;
+ } else if (car == '.' && !inQuote) {
+ posDot = i;
+ break;
+ }
+ i++;
}
if (posDot >= 0) {
String rest = line.substring(posDot + 1).trim();
line = line.substring(0, posDot + 1).trim();
- newParas.add(new Paragraph(ParagraphType.QUOTE, line));
- newParas.addAll(requotify(processPara(rest)));
+ long words = 1;
+ for (char car : line.toCharArray()) {
+ if (car == ' ') {
+ words++;
+ }
+ }
+ newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
+ if (!rest.isEmpty()) {
+ newParas.addAll(requotify(processPara(rest)));
+ }
} else {
newParas.add(para);
}
@@ -1039,7 +1093,7 @@ public abstract class BasicSupport {
*
* @return the processed {@link Paragraph}
*/
- private Paragraph processPara(String line) {
+ protected Paragraph processPara(String line) {
line = ifUnhtml(line).trim();
boolean space = true;
@@ -1048,6 +1102,7 @@ public abstract class BasicSupport {
boolean tentativeCloseQuote = false;
char prev = '\0';
int dashCount = 0;
+ long words = 1;
StringBuilder builder = new StringBuilder();
for (char car : line.toCharArray()) {
@@ -1062,11 +1117,16 @@ public abstract class BasicSupport {
if (tentativeCloseQuote) {
tentativeCloseQuote = false;
- if ((car >= 'a' && car <= 'z') || (car >= 'A' && car <= 'Z')
- || (car >= '0' && car <= '9')) {
+ if (Character.isLetterOrDigit(car)) {
builder.append("'");
} else {
- builder.append(closeQuote);
+ // handle double-single quotes as double quotes
+ if (prev == car) {
+ builder.append(closeDoubleQuote);
+ continue;
+ } else {
+ builder.append(closeQuote);
+ }
}
}
@@ -1076,15 +1136,31 @@ public abstract class BasicSupport {
case '\t':
case '\n': // just in case
case '\r': // just in case
+ if (builder.length() > 0
+ && builder.charAt(builder.length() - 1) != ' ') {
+ words++;
+ }
builder.append(' ');
break;
case '\'':
if (space || (brk && quote)) {
quote = true;
- builder.append(openQuote);
- } else if (prev == ' ') {
- builder.append(openQuote);
+ // handle double-single quotes as double quotes
+ if (prev == car) {
+ builder.deleteCharAt(builder.length() - 1);
+ builder.append(openDoubleQuote);
+ } else {
+ builder.append(openQuote);
+ }
+ } else if (prev == ' ' || prev == car) {
+ // handle double-single quotes as double quotes
+ if (prev == car) {
+ builder.deleteCharAt(builder.length() - 1);
+ builder.append(openDoubleQuote);
+ } else {
+ builder.append(openQuote);
+ }
} else {
// it is a quote ("I'm off") or a 'quote' ("This
// 'good' restaurant"...)
@@ -1137,7 +1213,13 @@ public abstract class BasicSupport {
quote = true;
builder.append(openQuote);
} else {
- builder.append(openQuote);
+ // handle double-single quotes as double quotes
+ if (prev == car) {
+ builder.deleteCharAt(builder.length() - 1);
+ builder.append(openDoubleQuote);
+ } else {
+ builder.append(openQuote);
+ }
}
space = false;
brk = false;
@@ -1150,7 +1232,13 @@ public abstract class BasicSupport {
case 'ã':
space = false;
brk = false;
- builder.append(closeQuote);
+ // handle double-single quotes as double quotes
+ if (prev == car) {
+ builder.deleteCharAt(builder.length() - 1);
+ builder.append(closeDoubleQuote);
+ } else {
+ builder.append(closeQuote);
+ }
break;
case '«':
@@ -1204,11 +1292,11 @@ public abstract class BasicSupport {
type = ParagraphType.QUOTE;
}
- return new Paragraph(type, line);
+ return new Paragraph(type, line, words);
}
/**
- * Remove the HTML from the inpit if {@link BasicSupport#isHtml()} is
+ * Remove the HTML from the input if {@link BasicSupport#isHtml()} is
* true.
*
* @param input
@@ -1248,8 +1336,8 @@ public abstract class BasicSupport {
}
}
- for (SupportType type : new SupportType[] { SupportType.TEXT,
- SupportType.INFO_TEXT }) {
+ for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
+ SupportType.TEXT }) {
BasicSupport support = getSupport(type);
if (support != null && support.supports(url)) {
return support;
@@ -1283,10 +1371,96 @@ public abstract class BasicSupport {
return new MangaFox().setType(type);
case E621:
return new E621().setType(type);
+ case YIFFSTAR:
+ return new YiffStar().setType(type);
case CBZ:
return new Cbz().setType(type);
+ case HTML:
+ return new Html().setType(type);
}
return null;
}
+
+ /**
+ * Return the first line from the given input which correspond to the given
+ * selectors.
+ *
+ * @param in
+ * the input
+ * @param needle
+ * a string that must be found inside the target line (also
+ * supports "^" at start to say "only if it starts with" the
+ * needle)
+ * @param relativeLine
+ * the line to return based upon the target line position (-1 =
+ * the line before, 0 = the target line...)
+ *
+ * @return the line
+ */
+ static String getLine(InputStream in, String needle, int relativeLine) {
+ return getLine(in, needle, relativeLine, true);
+ }
+
+ /**
+ * Return a line from the given input which correspond to the given
+ * selectors.
+ *
+ * @param in
+ * the input
+ * @param needle
+ * a string that must be found inside the target line (also
+ * supports "^" at start to say "only if it starts with" the
+ * needle)
+ * @param relativeLine
+ * the line to return based upon the target line position (-1 =
+ * the line before, 0 = the target line...)
+ * @param first
+ * takes the first result (as opposed to the last one, which will
+ * also always spend the input)
+ *
+ * @return the line
+ */
+ static String getLine(InputStream in, String needle, int relativeLine,
+ boolean first) {
+ String rep = null;
+
+ try {
+ in.reset();
+ } catch (IOException e) {
+ Instance.syserr(e);
+ }
+
+ List lines = new ArrayList();
+ @SuppressWarnings("resource")
+ Scanner scan = new Scanner(in, "UTF-8");
+ int index = -1;
+ scan.useDelimiter("\\n");
+ while (scan.hasNext()) {
+ lines.add(scan.next());
+
+ if (index == -1) {
+ if (needle.startsWith("^")) {
+ if (lines.get(lines.size() - 1).startsWith(
+ needle.substring(1))) {
+ index = lines.size() - 1;
+ }
+
+ } else {
+ if (lines.get(lines.size() - 1).contains(needle)) {
+ index = lines.size() - 1;
+ }
+ }
+ }
+
+ if (index >= 0 && index + relativeLine < lines.size()) {
+ rep = lines.get(index + relativeLine);
+ if (first) {
+ break;
+ }
+ }
+ }
+
+ return rep;
+ }
}