import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
+import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
MANGAFOX,
/** Furry website with comics support */
E621,
+ /** Furry website with stories */
+ YIFFSTAR,
/** CBZ files */
CBZ,
/** HTML files */
protected abstract String getChapterContent(URL source, InputStream in,
int number) throws IOException;
+ /**
+ * Log into the support (can be a no-op depending upon the support).
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ public void login() throws IOException {
+
+ }
+
/**
* Return the list of cookies (values included) that must be used to
* correctly fetch the resources.
* it.
*
* @return the cookies
+ *
+ * @throws IOException
+ * in case of I/O error
*/
- public Map<String, String> getCookies() {
+ public Map<String, String> getCookies() throws IOException {
return new HashMap<String, String>();
}
+ /**
+ * Return the canonical form of the main {@link URL}.
+ *
+ * @param source
+ * the source {@link URL}
+ *
+ * @return the canonical form of this {@link URL}
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ public URL getCanonicalUrl(URL source) throws IOException {
+ return source;
+ }
+
/**
* Process the given story resource into a partially filled {@link Story}
* object containing the name and metadata, except for the description.
*/
protected Story processMeta(URL url, boolean close, boolean getDesc)
throws IOException {
+ login();
+
+ url = getCanonicalUrl(url);
+
+ setCurrentReferer(url);
+
in = openInput(url);
if (in == null) {
return null;
Story story = new Story();
MetaData meta = getMeta(url, getInput());
+ if (meta.getCreationDate() == null
+ || meta.getCreationDate().isEmpty()) {
+ meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
+ }
story.setMeta(meta);
if (meta != null && meta.getCover() == null) {
in.close();
}
}
+
+ setCurrentReferer(null);
}
}
pg.setMinMax(0, 100);
}
- setCurrentReferer(url);
-
+ url = getCanonicalUrl(url);
pg.setProgress(1);
try {
Story story = processMeta(url, false, true);
return null;
}
+ pg.setName("Retrieving " + story.getMeta().getTitle());
+
+ setCurrentReferer(url);
+
story.setChapters(new ArrayList<Chapter>());
List<Entry<String, URL>> chapters = getChapters(url, getInput());
Progress pgChaps = new Progress(0, chapters.size());
pg.addProgress(pgChaps, 80);
+ long words = 0;
for (Entry<String, URL> chap : chapters) {
setCurrentReferer(chap.getValue());
InputStream chapIn = Instance.getCache().open(
chap.getValue(), this, true);
try {
- story.getChapters().add(
- makeChapter(url, i, chap.getKey(),
- getChapterContent(url, chapIn, i)));
+ Chapter cc = makeChapter(url, i, chap.getKey(),
+ getChapterContent(url, chapIn, i));
+ words += cc.getWords();
+ story.getChapters().add(cc);
+ if (story.getMeta() != null) {
+ story.getMeta().setWords(words);
+ }
} finally {
chapIn.close();
}
in.close();
}
- currentReferer = null;
+ setCurrentReferer(null);
}
}
/**
- * The support type.$
+ * The support type.
*
* @return the type
*/
Chapter chap = new Chapter(number, chapterName);
if (content != null) {
- chap.setParagraphs(makeParagraphs(source, content));
+ List<Paragraph> paras = makeParagraphs(source, content);
+ long words = 0;
+ for (Paragraph para : paras) {
+ words += para.getWords();
+ }
+ chap.setParagraphs(paras);
+ chap.setWords(words);
}
return chap;
/**
* Return the list of supported image extensions.
*
+ * @param emptyAllowed
+ * TRUE to allow an empty extension on first place, which can be
+ * used when you may already have an extension in your input but
+ * are not sure about it
+ *
* @return the extensions
*/
static String[] getImageExt(boolean emptyAllowed) {
}
}
+ /**
+ * Check if the given resource can be a local image or a remote image, then
+ * refresh the cache with it if it is.
+ *
+ * @param source
+ * the story source
+ * @param line
+ * the resource to check
+ *
+ * @return the image if found, or NULL
+ *
+ */
static BufferedImage getImage(BasicSupport support, URL source, String line) {
URL url = getImageUrl(support, source, line);
if (url != null) {
return Instance.getCache().open(source, this, false);
}
+ /**
+ * Reset the given {@link InputStream} and return it.
+ *
+ * @param in
+ * the {@link InputStream} to reset
+ *
+ * @return the same {@link InputStream} after reset
+ */
protected InputStream reset(InputStream in) {
try {
in.reset();
* paragraphs (quotes or not)).
*
* @param para
- * the paragraph to requotify (not necessaraly a quote)
+ * the paragraph to requotify (not necessarily a quote)
*
* @return the correctly (or so we hope) quotified paragraphs
*/
if (!singleQ && !doubleQ) {
line = openDoubleQuote + line + closeDoubleQuote;
- newParas.add(new Paragraph(ParagraphType.QUOTE, line));
+ newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
+ .getWords()));
} else {
char open = singleQ ? openQuote : openDoubleQuote;
char close = singleQ ? closeQuote : closeDoubleQuote;
if (posDot >= 0) {
String rest = line.substring(posDot + 1).trim();
line = line.substring(0, posDot + 1).trim();
- newParas.add(new Paragraph(ParagraphType.QUOTE, line));
+ long words = 1;
+ for (char car : line.toCharArray()) {
+ if (car == ' ') {
+ words++;
+ }
+ }
+ newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
if (!rest.isEmpty()) {
newParas.addAll(requotify(processPara(rest)));
}
boolean tentativeCloseQuote = false;
char prev = '\0';
int dashCount = 0;
+ long words = 1;
StringBuilder builder = new StringBuilder();
for (char car : line.toCharArray()) {
case '\t':
case '\n': // just in case
case '\r': // just in case
+ if (builder.length() > 0
+ && builder.charAt(builder.length() - 1) != ' ') {
+ words++;
+ }
builder.append(' ');
break;
type = ParagraphType.QUOTE;
}
- return new Paragraph(type, line);
+ return new Paragraph(type, line, words);
}
/**
- * Remove the HTML from the inpit <b>if</b> {@link BasicSupport#isHtml()} is
+ * Remove the HTML from the input <b>if</b> {@link BasicSupport#isHtml()} is
* true.
*
* @param input
return new MangaFox().setType(type);
case E621:
return new E621().setType(type);
+ case YIFFSTAR:
+ return new YiffStar().setType(type);
case CBZ:
return new Cbz().setType(type);
case HTML: