package be.nikiroo.fanfix.supported;
import java.awt.image.BufferedImage;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Scanner;
import be.nikiroo.fanfix.Instance;
import be.nikiroo.fanfix.bundles.Config;
import be.nikiroo.fanfix.bundles.StringId;
import be.nikiroo.fanfix.data.Chapter;
import be.nikiroo.fanfix.data.MetaData;
import be.nikiroo.fanfix.data.Paragraph;
import be.nikiroo.fanfix.data.Paragraph.ParagraphType;
import be.nikiroo.fanfix.data.Story;
import be.nikiroo.utils.IOUtils;
import be.nikiroo.utils.Progress;
import be.nikiroo.utils.StringUtils;
/**
* This class is the base class used by the other support classes. It can be
* used outside of this package, and have static method that you can use to get
* access to the correct support class.
*
* It will be used with 'resources' (usually web pages or files).
*
* @author niki
*/
public abstract class BasicSupport {
/**
* The supported input types for which we can get a {@link BasicSupport}
* object.
*
* @author niki
*/
public enum SupportType {
/** EPUB files created with this program */
EPUB,
/** Pure text file with some rules */
TEXT,
/** TEXT but with associated .info file */
INFO_TEXT,
/** My Little Pony fanfictions */
FIMFICTION,
/** Fanfictions from a lot of different universes */
FANFICTION,
/** Website with lots of Mangas */
MANGAFOX,
/** Furry website with comics support */
E621,
/** Furry website with stories */
YIFFSTAR,
/** Comics and images groups, mostly but not only NSFW */
E_HENTAI,
/** CBZ files */
CBZ,
/** HTML files */
HTML;
/**
* A description of this support type (more information than the
* {@link BasicSupport#getSourceName()}).
*
* @return the description
*/
public String getDesc() {
String desc = Instance.getTrans().getStringX(StringId.INPUT_DESC,
this.name());
if (desc == null) {
desc = Instance.getTrans().getString(StringId.INPUT_DESC, this);
}
return desc;
}
/**
* The name of this support type (a short version).
*
* @return the name
*/
public String getSourceName() {
BasicSupport support = BasicSupport.getSupport(this);
if (support != null) {
return support.getSourceName();
}
return null;
}
@Override
public String toString() {
return super.toString().toLowerCase();
}
/**
* Call {@link SupportType#valueOf(String)} after conversion to upper
* case.
*
* @param typeName
* the possible type name
*
* @return NULL or the type
*/
public static SupportType valueOfUC(String typeName) {
return SupportType.valueOf(typeName == null ? null : typeName
.toUpperCase());
}
/**
* Call {@link SupportType#valueOf(String)} after conversion to upper
* case but return NULL for NULL instead of raising exception.
*
* @param typeName
* the possible type name
*
* @return NULL or the type
*/
public static SupportType valueOfNullOkUC(String typeName) {
if (typeName == null) {
return null;
}
return SupportType.valueOfUC(typeName);
}
/**
* Call {@link SupportType#valueOf(String)} after conversion to upper
* case but return NULL in case of error instead of raising an
* exception.
*
* @param typeName
* the possible type name
*
* @return NULL or the type
*/
public static SupportType valueOfAllOkUC(String typeName) {
try {
return SupportType.valueOfUC(typeName);
} catch (Exception e) {
return null;
}
}
}
private InputStream in;
private SupportType type;
private URL currentReferer; // with only one 'r', as in 'HTTP'...
// quote chars
private char openQuote = Instance.getTrans().getCharacter(
StringId.OPEN_SINGLE_QUOTE);
private char closeQuote = Instance.getTrans().getCharacter(
StringId.CLOSE_SINGLE_QUOTE);
private char openDoubleQuote = Instance.getTrans().getCharacter(
StringId.OPEN_DOUBLE_QUOTE);
private char closeDoubleQuote = Instance.getTrans().getCharacter(
StringId.CLOSE_DOUBLE_QUOTE);
/**
* The name of this support class.
*
* @return the name
*/
protected abstract String getSourceName();
/**
* Check if the given resource is supported by this {@link BasicSupport}.
*
* @param url
* the resource to check for
*
* @return TRUE if it is
*/
protected abstract boolean supports(URL url);
/**
* Return TRUE if the support will return HTML encoded content values for
* the chapters content.
*
* @return TRUE for HTML
*/
protected abstract boolean isHtml();
/**
* Return the {@link MetaData} of this story.
*
* @param source
* the source of the story
* @param in
* the input (the main resource)
*
* @return the associated {@link MetaData}
*
* @throws IOException
* in case of I/O error
*/
protected abstract MetaData getMeta(URL source, InputStream in)
throws IOException;
/**
* Return the story description.
*
* @param source
* the source of the story
* @param in
* the input (the main resource)
*
* @return the description
*
* @throws IOException
* in case of I/O error
*/
protected abstract String getDesc(URL source, InputStream in)
throws IOException;
/**
* Return the list of chapters (name and resource).
*
* @param source
* the source of the story
* @param in
* the input (the main resource)
* @param pg
* the optional progress reporter
*
* @return the chapters
*
* @throws IOException
* in case of I/O error
*/
protected abstract List> getChapters(URL source,
InputStream in, Progress pg) throws IOException;
/**
* Return the content of the chapter (possibly HTML encoded, if
* {@link BasicSupport#isHtml()} is TRUE).
*
* @param source
* the source of the story
* @param in
* the input (the main resource)
* @param number
* the chapter number
* @param pg
* the optional progress reporter
*
* @return the content
*
* @throws IOException
* in case of I/O error
*/
protected abstract String getChapterContent(URL source, InputStream in,
int number, Progress pg) throws IOException;
/**
* Log into the support (can be a no-op depending upon the support).
*
* @throws IOException
* in case of I/O error
*/
public void login() throws IOException {
}
/**
* Return the list of cookies (values included) that must be used to
* correctly fetch the resources.
*
* You are expected to call the super method implementation if you override
* it.
*
* @return the cookies
*
* @throws IOException
* in case of I/O error
*/
public Map getCookies() throws IOException {
return new HashMap();
}
/**
* Return the canonical form of the main {@link URL}.
*
* @param source
* the source {@link URL}
*
* @return the canonical form of this {@link URL}
*
* @throws IOException
* in case of I/O error
*/
public URL getCanonicalUrl(URL source) throws IOException {
return source;
}
/**
* Process the given story resource into a partially filled {@link Story}
* object containing the name and metadata, except for the description.
*
* @param url
* the story resource
*
* @return the {@link Story}
*
* @throws IOException
* in case of I/O error
*/
public Story processMeta(URL url) throws IOException {
return processMeta(url, true, false, null);
}
/**
* Process the given story resource into a partially filled {@link Story}
* object containing the name and metadata.
*
* @param url
* the story resource
* @param close
* close "this" and "in" when done
* @param getDesc
* retrieve the description of the story, or not
* @param pg
* the optional progress reporter
*
* @return the {@link Story}
*
* @throws IOException
* in case of I/O error
*/
protected Story processMeta(URL url, boolean close, boolean getDesc,
Progress pg) throws IOException {
if (pg == null) {
pg = new Progress();
} else {
pg.setMinMax(0, 100);
}
login();
pg.setProgress(10);
url = getCanonicalUrl(url);
setCurrentReferer(url);
in = openInput(url);
if (in == null) {
return null;
}
try {
preprocess(url, getInput());
pg.setProgress(30);
Story story = new Story();
MetaData meta = getMeta(url, getInput());
if (meta.getCreationDate() == null
|| meta.getCreationDate().isEmpty()) {
meta.setCreationDate(StringUtils.fromTime(new Date().getTime()));
}
story.setMeta(meta);
pg.setProgress(50);
if (meta != null && meta.getCover() == null) {
meta.setCover(getDefaultCover(meta.getSubject()));
}
pg.setProgress(60);
if (getDesc) {
String descChapterName = Instance.getTrans().getString(
StringId.DESCRIPTION);
story.getMeta().setResume(
makeChapter(url, 0, descChapterName,
getDesc(url, getInput()), null));
}
pg.setProgress(100);
return story;
} finally {
if (close) {
try {
close();
} catch (IOException e) {
Instance.syserr(e);
}
if (in != null) {
in.close();
}
}
setCurrentReferer(null);
}
}
/**
* Process the given story resource into a fully filled {@link Story}
* object.
*
* @param url
* the story resource
* @param pg
* the optional progress reporter
*
* @return the {@link Story}
*
* @throws IOException
* in case of I/O error
*/
public Story process(URL url, Progress pg) throws IOException {
if (pg == null) {
pg = new Progress();
} else {
pg.setMinMax(0, 100);
}
url = getCanonicalUrl(url);
pg.setProgress(1);
try {
Progress pgMeta = new Progress();
pg.addProgress(pgMeta, 10);
Story story = processMeta(url, false, true, pgMeta);
if (!pgMeta.isDone()) {
pgMeta.setProgress(pgMeta.getMax()); // 10%
}
if (story == null) {
pg.setProgress(90);
return null;
}
pg.setName("Retrieving " + story.getMeta().getTitle());
setCurrentReferer(url);
Progress pgGetChapters = new Progress();
pg.addProgress(pgGetChapters, 10);
story.setChapters(new ArrayList());
List> chapters = getChapters(url, getInput(),
pgGetChapters);
if (!pgGetChapters.isDone()) {
pgGetChapters.setProgress(pgGetChapters.getMax()); // 20%
}
if (chapters != null) {
Progress pgChaps = new Progress("Extracting chapters", 0,
chapters.size() * 300);
pg.addProgress(pgChaps, 80);
long words = 0;
int i = 1;
for (Entry chap : chapters) {
pgChaps.setName("Extracting chapter " + i);
setCurrentReferer(chap.getValue());
InputStream chapIn = Instance.getCache().open(
chap.getValue(), this, true);
pgChaps.setProgress(i * 100);
try {
Progress pgGetChapterContent = new Progress();
Progress pgMakeChapter = new Progress();
pgChaps.addProgress(pgGetChapterContent, 100);
pgChaps.addProgress(pgMakeChapter, 100);
String content = getChapterContent(url, chapIn, i,
pgGetChapterContent);
if (!pgGetChapterContent.isDone()) {
pgGetChapterContent.setProgress(pgGetChapterContent
.getMax());
}
Chapter cc = makeChapter(url, i, chap.getKey(),
content, pgMakeChapter);
if (!pgMakeChapter.isDone()) {
pgMakeChapter.setProgress(pgMakeChapter.getMax());
}
words += cc.getWords();
story.getChapters().add(cc);
if (story.getMeta() != null) {
story.getMeta().setWords(words);
}
} finally {
chapIn.close();
}
i++;
}
pgChaps.setName("Extracting chapters");
} else {
pg.setProgress(80);
}
return story;
} finally {
try {
close();
} catch (IOException e) {
Instance.syserr(e);
}
if (in != null) {
in.close();
}
setCurrentReferer(null);
}
}
/**
* The support type.
*
* @return the type
*/
public SupportType getType() {
return type;
}
/**
* The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
* the current {@link URL} we work on.
*
* @return the referer
*/
public URL getCurrentReferer() {
return currentReferer;
}
/**
* The current referer {@link URL} (only one 'r', as in 'HTML'...), i.e.,
* the current {@link URL} we work on.
*
* @param currentReferer
* the new referer
*/
protected void setCurrentReferer(URL currentReferer) {
this.currentReferer = currentReferer;
}
/**
* The support type.
*
* @param type
* the new type
*
* @return this
*/
protected BasicSupport setType(SupportType type) {
this.type = type;
return this;
}
/**
* Prepare the support if needed before processing.
*
* @param source
* the source of the story
* @param in
* the input (the main resource)
*
* @throws IOException
* on I/O error
*/
protected void preprocess(URL source, InputStream in) throws IOException {
}
/**
* Now that we have processed the {@link Story}, close the resources if any.
*
* @throws IOException
* on I/O error
*/
protected void close() throws IOException {
}
/**
* Create a {@link Chapter} object from the given information, formatting
* the content as it should be.
*
* @param source
* the source of the story
* @param number
* the chapter number
* @param name
* the chapter name
* @param content
* the chapter content
* @param pg
* the optional progress reporter
*
* @return the {@link Chapter}
*
* @throws IOException
* in case of I/O error
*/
protected Chapter makeChapter(URL source, int number, String name,
String content, Progress pg) throws IOException {
// Chapter name: process it correctly, then remove the possible
// redundant "Chapter x: " in front of it, or "-" (as in
// "Chapter 5: - Fun!" after the ": " was automatically added)
String chapterName = processPara(name).getContent().trim();
for (String lang : Instance.getConfig().getString(Config.CHAPTER)
.split(",")) {
String chapterWord = Instance.getConfig().getStringX(
Config.CHAPTER, lang);
if (chapterName.startsWith(chapterWord)) {
chapterName = chapterName.substring(chapterWord.length())
.trim();
break;
}
}
if (chapterName.startsWith(Integer.toString(number))) {
chapterName = chapterName.substring(
Integer.toString(number).length()).trim();
}
while (chapterName.startsWith(":") || chapterName.startsWith("-")) {
chapterName = chapterName.substring(1).trim();
}
//
Chapter chap = new Chapter(number, chapterName);
if (content != null) {
List paras = makeParagraphs(source, content, pg);
long words = 0;
for (Paragraph para : paras) {
words += para.getWords();
}
chap.setParagraphs(paras);
chap.setWords(words);
}
return chap;
}
/**
* Convert the given content into {@link Paragraph}s.
*
* @param source
* the source URL of the story
* @param content
* the textual content
* @param pg
* the optional progress reporter
*
* @return the {@link Paragraph}s
*
* @throws IOException
* in case of I/O error
*/
protected List makeParagraphs(URL source, String content,
Progress pg) throws IOException {
if (pg == null) {
pg = new Progress();
}
if (isHtml()) {
// Special
processing:
content = content.replaceAll("(
]*>)|(
)|(
)",
"
* * *
");
}
List paras = new ArrayList();
if (content != null && !content.trim().isEmpty()) {
if (isHtml()) {
String[] tab = content.split("(|
|
|
)");
pg.setMinMax(0, tab.length);
int i = 1;
for (String line : tab) {
if (line.startsWith("[") && line.endsWith("]")) {
pg.setName("Extracting image " + i);
}
paras.add(makeParagraph(source, line.trim()));
pg.setProgress(i++);
}
pg.setName(null);
} else {
List lines = new ArrayList();
BufferedReader buff = null;
try {
buff = new BufferedReader(
new InputStreamReader(new ByteArrayInputStream(
content.getBytes("UTF-8")), "UTF-8"));
for (String line = buff.readLine(); line != null; line = buff
.readLine()) {
lines.add(line.trim());
}
} finally {
if (buff != null) {
buff.close();
}
}
pg.setMinMax(0, lines.size());
int i = 0;
for (String line : lines) {
if (line.startsWith("[") && line.endsWith("]")) {
pg.setName("Extracting image " + i);
}
paras.add(makeParagraph(source, line));
pg.setProgress(i++);
}
pg.setName(null);
}
// Check quotes for "bad" format
List newParas = new ArrayList();
for (Paragraph para : paras) {
newParas.addAll(requotify(para));
}
paras = newParas;
// Remove double blanks/brks
fixBlanksBreaks(paras);
}
return paras;
}
/**
* Convert the given line into a single {@link Paragraph}.
*
* @param source
* the source URL of the story
* @param line
* the textual content of the paragraph
*
* @return the {@link Paragraph}
*/
private Paragraph makeParagraph(URL source, String line) {
URL image = null;
if (line.startsWith("[") && line.endsWith("]")) {
image = getImageUrl(this, source,
line.substring(1, line.length() - 1).trim());
}
if (image != null) {
return new Paragraph(image);
} else {
return processPara(line);
}
}
/**
* Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
* those {@link Paragraph}s.
*
* The resulting list will not contain a starting or trailing blank/break
* nor 2 blanks or breaks following each other.
*
* @param paras
* the list of {@link Paragraph}s to fix
*/
protected void fixBlanksBreaks(List paras) {
boolean space = false;
boolean brk = true;
for (int i = 0; i < paras.size(); i++) {
Paragraph para = paras.get(i);
boolean thisSpace = para.getType() == ParagraphType.BLANK;
boolean thisBrk = para.getType() == ParagraphType.BREAK;
if (i > 0 && space && thisBrk) {
paras.remove(i - 1);
i--;
} else if ((space || brk) && (thisSpace || thisBrk)) {
paras.remove(i);
i--;
}
space = thisSpace;
brk = thisBrk;
}
// Remove blank/brk at start
if (paras.size() > 0
&& (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
0).getType() == ParagraphType.BREAK)) {
paras.remove(0);
}
// Remove blank/brk at end
int last = paras.size() - 1;
if (paras.size() > 0
&& (paras.get(last).getType() == ParagraphType.BLANK || paras
.get(last).getType() == ParagraphType.BREAK)) {
paras.remove(last);
}
}
/**
* Get the default cover related to this subject (see .info files).
*
* @param subject
* the subject
*
* @return the cover if any, or NULL
*/
static BufferedImage getDefaultCover(String subject) {
if (subject != null && !subject.isEmpty()
&& Instance.getCoverDir() != null) {
try {
File fileCover = new File(Instance.getCoverDir(), subject);
return getImage(null, fileCover.toURI().toURL(), subject);
} catch (MalformedURLException e) {
}
}
return null;
}
/**
* Return the list of supported image extensions.
*
* @param emptyAllowed
* TRUE to allow an empty extension on first place, which can be
* used when you may already have an extension in your input but
* are not sure about it
*
* @return the extensions
*/
static String[] getImageExt(boolean emptyAllowed) {
if (emptyAllowed) {
return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
} else {
return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" };
}
}
/**
* Check if the given resource can be a local image or a remote image, then
* refresh the cache with it if it is.
*
* @param source
* the story source
* @param line
* the resource to check
*
* @return the image if found, or NULL
*
*/
static BufferedImage getImage(BasicSupport support, URL source, String line) {
URL url = getImageUrl(support, source, line);
if (url != null) {
InputStream in = null;
try {
in = Instance.getCache().open(url, getSupport(url), true);
return IOUtils.toImage(in);
} catch (IOException e) {
} finally {
if (in != null) {
try {
in.close();
} catch (IOException e) {
}
}
}
}
return null;
}
/**
* Check if the given resource can be a local image or a remote image, then
* refresh the cache with it if it is.
*
* @param source
* the story source
* @param line
* the resource to check
*
* @return the image URL if found, or NULL
*
*/
static URL getImageUrl(BasicSupport support, URL source, String line) {
URL url = null;
if (line != null) {
// try for files
if (source != null) {
try {
String relPath = null;
String absPath = null;
try {
String path = new File(source.getFile()).getParent();
relPath = new File(new File(path), line.trim())
.getAbsolutePath();
} catch (Exception e) {
// Cannot be converted to path (one possibility to take
// into account: absolute path on Windows)
}
try {
absPath = new File(line.trim()).getAbsolutePath();
} catch (Exception e) {
// Cannot be converted to path (at all)
}
for (String ext : getImageExt(true)) {
if (absPath != null && new File(absPath + ext).exists()) {
url = new File(absPath + ext).toURI().toURL();
} else if (relPath != null
&& new File(relPath + ext).exists()) {
url = new File(relPath + ext).toURI().toURL();
}
}
} catch (Exception e) {
// Should not happen since we control the correct arguments
}
}
if (url == null) {
// try for URLs
try {
for (String ext : getImageExt(true)) {
if (Instance.getCache().check(new URL(line + ext))) {
url = new URL(line + ext);
break;
}
}
// try out of cache
if (url == null) {
for (String ext : getImageExt(true)) {
try {
url = new URL(line + ext);
Instance.getCache().refresh(url, support, true);
break;
} catch (IOException e) {
// no image with this ext
url = null;
}
}
}
} catch (MalformedURLException e) {
// Not an url
}
}
// refresh the cached file
if (url != null) {
try {
Instance.getCache().refresh(url, support, true);
} catch (IOException e) {
// woops, broken image
url = null;
}
}
}
return url;
}
/**
* Open the input file that will be used through the support.
*
* @param source
* the source {@link URL}
*
* @return the {@link InputStream}
*
* @throws IOException
* in case of I/O error
*/
protected InputStream openInput(URL source) throws IOException {
return Instance.getCache().open(source, this, false);
}
/**
* Reset the given {@link InputStream} and return it.
*
* @param in
* the {@link InputStream} to reset
*
* @return the same {@link InputStream} after reset
*/
protected InputStream reset(InputStream in) {
try {
in.reset();
} catch (IOException e) {
}
return in;
}
/**
* Reset then return {@link BasicSupport#in}.
*
* @return {@link BasicSupport#in}
*/
protected InputStream getInput() {
return reset(in);
}
/**
* Fix the author name if it is prefixed with some "by" {@link String}.
*
* @param author
* the author with a possible prefix
*
* @return the author without prefixes
*/
protected String fixAuthor(String author) {
if (author != null) {
for (String suffix : new String[] { " ", ":" }) {
for (String byString : Instance.getConfig()
.getString(Config.BYS).split(",")) {
byString += suffix;
if (author.toUpperCase().startsWith(byString.toUpperCase())) {
author = author.substring(byString.length()).trim();
}
}
}
// Special case (without suffix):
if (author.startsWith("©")) {
author = author.substring(1);
}
}
return author;
}
/**
* Check quotes for bad format (i.e., quotes with normal paragraphs inside)
* and requotify them (i.e., separate them into QUOTE paragraphs and other
* paragraphs (quotes or not)).
*
* @param para
* the paragraph to requotify (not necessarily a quote)
*
* @return the correctly (or so we hope) quotified paragraphs
*/
protected List requotify(Paragraph para) {
List newParas = new ArrayList();
if (para.getType() == ParagraphType.QUOTE
&& para.getContent().length() > 2) {
String line = para.getContent();
boolean singleQ = line.startsWith("" + openQuote);
boolean doubleQ = line.startsWith("" + openDoubleQuote);
// Do not try when more than one quote at a time
// (some stories are not easily readable if we do)
if (singleQ
&& line.indexOf(closeQuote, 1) < line
.lastIndexOf(closeQuote)) {
newParas.add(para);
return newParas;
}
if (doubleQ
&& line.indexOf(closeDoubleQuote, 1) < line
.lastIndexOf(closeDoubleQuote)) {
newParas.add(para);
return newParas;
}
//
if (!singleQ && !doubleQ) {
line = openDoubleQuote + line + closeDoubleQuote;
newParas.add(new Paragraph(ParagraphType.QUOTE, line, para
.getWords()));
} else {
char open = singleQ ? openQuote : openDoubleQuote;
char close = singleQ ? closeQuote : closeDoubleQuote;
int posDot = -1;
boolean inQuote = false;
int i = 0;
for (char car : line.toCharArray()) {
if (car == open) {
inQuote = true;
} else if (car == close) {
inQuote = false;
} else if (car == '.' && !inQuote) {
posDot = i;
break;
}
i++;
}
if (posDot >= 0) {
String rest = line.substring(posDot + 1).trim();
line = line.substring(0, posDot + 1).trim();
long words = 1;
for (char car : line.toCharArray()) {
if (car == ' ') {
words++;
}
}
newParas.add(new Paragraph(ParagraphType.QUOTE, line, words));
if (!rest.isEmpty()) {
newParas.addAll(requotify(processPara(rest)));
}
} else {
newParas.add(para);
}
}
} else {
newParas.add(para);
}
return newParas;
}
/**
* Process a {@link Paragraph} from a raw line of text.
*
* Will also fix quotes and HTML encoding if needed.
*
* @param line
* the raw line
*
* @return the processed {@link Paragraph}
*/
protected Paragraph processPara(String line) {
line = ifUnhtml(line).trim();
boolean space = true;
boolean brk = true;
boolean quote = false;
boolean tentativeCloseQuote = false;
char prev = '\0';
int dashCount = 0;
long words = 1;
StringBuilder builder = new StringBuilder();
for (char car : line.toCharArray()) {
if (car != '-') {
if (dashCount > 0) {
// dash, ndash and mdash: - – —
// currently: always use mdash
builder.append(dashCount == 1 ? '-' : '—');
}
dashCount = 0;
}
if (tentativeCloseQuote) {
tentativeCloseQuote = false;
if (Character.isLetterOrDigit(car)) {
builder.append("'");
} else {
// handle double-single quotes as double quotes
if (prev == car) {
builder.append(closeDoubleQuote);
continue;
} else {
builder.append(closeQuote);
}
}
}
switch (car) {
case ' ': // note: unbreakable space
case ' ':
case '\t':
case '\n': // just in case
case '\r': // just in case
if (builder.length() > 0
&& builder.charAt(builder.length() - 1) != ' ') {
words++;
}
builder.append(' ');
break;
case '\'':
if (space || (brk && quote)) {
quote = true;
// handle double-single quotes as double quotes
if (prev == car) {
builder.deleteCharAt(builder.length() - 1);
builder.append(openDoubleQuote);
} else {
builder.append(openQuote);
}
} else if (prev == ' ' || prev == car) {
// handle double-single quotes as double quotes
if (prev == car) {
builder.deleteCharAt(builder.length() - 1);
builder.append(openDoubleQuote);
} else {
builder.append(openQuote);
}
} else {
// it is a quote ("I'm off") or a 'quote' ("This
// 'good' restaurant"...)
tentativeCloseQuote = true;
}
break;
case '"':
if (space || (brk && quote)) {
quote = true;
builder.append(openDoubleQuote);
} else if (prev == ' ') {
builder.append(openDoubleQuote);
} else {
builder.append(closeDoubleQuote);
}
break;
case '-':
if (space) {
quote = true;
} else {
dashCount++;
}
space = false;
break;
case '*':
case '~':
case '/':
case '\\':
case '<':
case '>':
case '=':
case '+':
case '_':
case '–':
case '—':
space = false;
builder.append(car);
break;
case '‘':
case '`':
case '‹':
case '﹁':
case '〈':
case '「':
if (space || (brk && quote)) {
quote = true;
builder.append(openQuote);
} else {
// handle double-single quotes as double quotes
if (prev == car) {
builder.deleteCharAt(builder.length() - 1);
builder.append(openDoubleQuote);
} else {
builder.append(openQuote);
}
}
space = false;
brk = false;
break;
case '’':
case '›':
case '﹂':
case '〉':
case '」':
space = false;
brk = false;
// handle double-single quotes as double quotes
if (prev == car) {
builder.deleteCharAt(builder.length() - 1);
builder.append(closeDoubleQuote);
} else {
builder.append(closeQuote);
}
break;
case '«':
case '“':
case '﹃':
case '《':
case '『':
if (space || (brk && quote)) {
quote = true;
builder.append(openDoubleQuote);
} else {
builder.append(openDoubleQuote);
}
space = false;
brk = false;
break;
case '»':
case '”':
case '﹄':
case '》':
case '』':
space = false;
brk = false;
builder.append(closeDoubleQuote);
break;
default:
space = false;
brk = false;
builder.append(car);
break;
}
prev = car;
}
if (tentativeCloseQuote) {
tentativeCloseQuote = false;
builder.append(closeQuote);
}
line = builder.toString().trim();
ParagraphType type = ParagraphType.NORMAL;
if (space) {
type = ParagraphType.BLANK;
} else if (brk) {
type = ParagraphType.BREAK;
} else if (quote) {
type = ParagraphType.QUOTE;
}
return new Paragraph(type, line, words);
}
/**
* Remove the HTML from the input if {@link BasicSupport#isHtml()} is
* true.
*
* @param input
* the input
*
* @return the no html version if needed
*/
private String ifUnhtml(String input) {
if (isHtml() && input != null) {
return StringUtils.unhtml(input);
}
return input;
}
/**
* Return a {@link BasicSupport} implementation supporting the given
* resource if possible.
*
* @param url
* the story resource
*
* @return an implementation that supports it, or NULL
*/
public static BasicSupport getSupport(URL url) {
if (url == null) {
return null;
}
// TEXT and INFO_TEXT always support files (not URLs though)
for (SupportType type : SupportType.values()) {
if (type != SupportType.TEXT && type != SupportType.INFO_TEXT) {
BasicSupport support = getSupport(type);
if (support != null && support.supports(url)) {
return support;
}
}
}
for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
SupportType.TEXT }) {
BasicSupport support = getSupport(type);
if (support != null && support.supports(url)) {
return support;
}
}
return null;
}
/**
* Return a {@link BasicSupport} implementation supporting the given type.
*
* @param type
* the type
*
* @return an implementation that supports it, or NULL
*/
public static BasicSupport getSupport(SupportType type) {
switch (type) {
case EPUB:
return new Epub().setType(type);
case INFO_TEXT:
return new InfoText().setType(type);
case FIMFICTION:
return new Fimfiction().setType(type);
case FANFICTION:
return new Fanfiction().setType(type);
case TEXT:
return new Text().setType(type);
case MANGAFOX:
return new MangaFox().setType(type);
case E621:
return new E621().setType(type);
case YIFFSTAR:
return new YiffStar().setType(type);
case E_HENTAI:
return new EHentai().setType(type);
case CBZ:
return new Cbz().setType(type);
case HTML:
return new Html().setType(type);
}
return null;
}
/**
* Return the first line from the given input which correspond to the given
* selectors.
*
* @param in
* the input
* @param needle
* a string that must be found inside the target line (also
* supports "^" at start to say "only if it starts with" the
* needle)
* @param relativeLine
* the line to return based upon the target line position (-1 =
* the line before, 0 = the target line...)
*
* @return the line
*/
static String getLine(InputStream in, String needle, int relativeLine) {
return getLine(in, needle, relativeLine, true);
}
/**
* Return a line from the given input which correspond to the given
* selectors.
*
* @param in
* the input
* @param needle
* a string that must be found inside the target line (also
* supports "^" at start to say "only if it starts with" the
* needle)
* @param relativeLine
* the line to return based upon the target line position (-1 =
* the line before, 0 = the target line...)
* @param first
* takes the first result (as opposed to the last one, which will
* also always spend the input)
*
* @return the line
*/
static String getLine(InputStream in, String needle, int relativeLine,
boolean first) {
String rep = null;
try {
in.reset();
} catch (IOException e) {
Instance.syserr(e);
}
List lines = new ArrayList();
@SuppressWarnings("resource")
Scanner scan = new Scanner(in, "UTF-8");
int index = -1;
scan.useDelimiter("\\n");
while (scan.hasNext()) {
lines.add(scan.next());
if (index == -1) {
if (needle.startsWith("^")) {
if (lines.get(lines.size() - 1).startsWith(
needle.substring(1))) {
index = lines.size() - 1;
}
} else {
if (lines.get(lines.size() - 1).contains(needle)) {
index = lines.size() - 1;
}
}
}
if (index >= 0 && index + relativeLine < lines.size()) {
rep = lines.get(index + relativeLine);
if (first) {
break;
}
}
}
return rep;
}
/**
* Return the text between the key and the endKey (and optional subKey can
* be passed, in this case we will look for the key first, then take the
* text between the subKey and the endKey).
*
* Will only match the first line with the given key if more than one are
* possible. Which also means that if the subKey or endKey is not found on
* that line, NULL will be returned.
*
* @param in
* the input
* @param key
* the key to match (also supports "^" at start to say
* "only if it starts with" the key)
* @param subKey
* the sub key or NULL if none
* @param endKey
* the end key or NULL for "up to the end"
* @return the text or NULL if not found
*/
static String getKeyLine(InputStream in, String key, String subKey,
String endKey) {
String result = null;
String line = getLine(in, key, 0);
if (line != null && line.contains(key)) {
line = line.substring(line.indexOf(key) + key.length());
if (subKey == null || subKey.isEmpty() || line.contains(subKey)) {
if (subKey != null) {
line = line.substring(line.indexOf(subKey)
+ subKey.length());
}
if (endKey == null || line.contains(endKey)) {
if (endKey != null) {
line = line.substring(0, line.indexOf(endKey));
result = line;
}
}
}
}
return result;
}
}