package be.nikiroo.fanfix.supported; import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.net.URL; import java.util.ArrayList; import java.util.List; import be.nikiroo.fanfix.Instance; import be.nikiroo.fanfix.bundles.Config; import be.nikiroo.fanfix.bundles.StringId; import be.nikiroo.fanfix.data.Chapter; import be.nikiroo.fanfix.data.Paragraph; import be.nikiroo.fanfix.data.Paragraph.ParagraphType; import be.nikiroo.utils.Image; import be.nikiroo.utils.Progress; import be.nikiroo.utils.StringUtils; /** * Helper class for {@link BasicSupport}, mostly dedicated to {@link Paragraph} * and text formating for the {@link BasicSupport} class. * * @author niki */ public class BasicSupportPara { // quote chars private static char openQuote = Instance.getInstance().getTrans().getCharacter(StringId.OPEN_SINGLE_QUOTE); private static char closeQuote = Instance.getInstance().getTrans().getCharacter(StringId.CLOSE_SINGLE_QUOTE); private static char openDoubleQuote = Instance.getInstance().getTrans().getCharacter(StringId.OPEN_DOUBLE_QUOTE); private static char closeDoubleQuote = Instance.getInstance().getTrans().getCharacter(StringId.CLOSE_DOUBLE_QUOTE); // used by this class: BasicSupportHelper bsHelper; BasicSupportImages bsImages; /** * Create a new {@link BasicSupportPara}. *
* Note that you need an instance of both {@link BasicSupportHelper} and
* {@link BasicSupportImages} for it to work.
*
* @param bsHelper
* the required {@link BasicSupportHelper}
* @param bsImages
* the required {@link BasicSupportImages}
*/
public BasicSupportPara(BasicSupportHelper bsHelper, BasicSupportImages bsImages) {
this.bsHelper = bsHelper;
this.bsImages = bsImages;
}
/**
* Create a {@link Chapter} object from the given information, formatting
* the content as it should be.
*
* @param support
* the linked {@link BasicSupport} (can be NULL)
* @param source
* the source of the story (for image lookup in the same path if
* the source is a file, can be NULL)
* @param number
* the chapter number
* @param name
* the chapter name
* @param content
* the chapter content
* @param pg
* the optional progress reporter
* @param html
* TRUE if the input content is in HTML mode
*
* @return the {@link Chapter}, never NULL
*
* @throws IOException
* in case of I/O error
*/
public Chapter makeChapter(BasicSupport support, URL source,
int number, String name, String content, boolean html, Progress pg)
throws IOException {
// Chapter name: process it correctly, then remove the possible
// redundant "Chapter x: " in front of it, or "-" (as in
// "Chapter 5: - Fun!" after the ": " was automatically added)
String chapterName = processPara(name, false)
.getContent().trim();
for (String lang : Instance.getInstance().getConfig().getList(Config.CONF_CHAPTER)) {
String chapterWord = Instance.getInstance().getConfig().getStringX(Config.CONF_CHAPTER, lang);
if (chapterName.startsWith(chapterWord)) {
chapterName = chapterName.substring(chapterWord.length())
.trim();
break;
}
}
if (chapterName.startsWith(Integer.toString(number))) {
chapterName = chapterName.substring(
Integer.toString(number).length()).trim();
}
while (chapterName.startsWith(":") || chapterName.startsWith("-")) {
chapterName = chapterName.substring(1).trim();
}
//
Chapter chap = new Chapter(number, chapterName);
if (content != null) {
List
* Will also fix quotes and HTML encoding if needed.
*
* @param line
* the raw line
* @param html
* TRUE if the input content is in HTML mode
*
* @return the processed {@link Paragraph}, never NULL
*/
protected Paragraph processPara(String line, boolean html) {
if (html) {
line = StringUtils.unhtml(line).trim();
}
boolean space = true;
boolean brk = true;
boolean quote = false;
boolean tentativeCloseQuote = false;
char prev = '\0';
int dashCount = 0;
long words = 1;
StringBuilder builder = new StringBuilder();
for (char car : line.toCharArray()) {
if (car != '-') {
if (dashCount > 0) {
// dash, ndash and mdash: - – —
// currently: always use mdash
builder.append(dashCount == 1 ? '-' : '—');
}
dashCount = 0;
}
if (tentativeCloseQuote) {
tentativeCloseQuote = false;
if (Character.isLetterOrDigit(car)) {
builder.append("'");
} else {
// handle double-single quotes as double quotes
if (prev == car) {
builder.append(closeDoubleQuote);
continue;
}
builder.append(closeQuote);
}
}
switch (car) {
case ' ': // note: unbreakable space
case ' ':
case '\t':
case '\n': // just in case
case '\r': // just in case
if (builder.length() > 0
&& builder.charAt(builder.length() - 1) != ' ') {
words++;
}
builder.append(' ');
break;
case '\'':
if (space || (brk && quote)) {
quote = true;
// handle double-single quotes as double quotes
if (prev == car) {
builder.deleteCharAt(builder.length() - 1);
builder.append(openDoubleQuote);
} else {
builder.append(openQuote);
}
} else if (prev == ' ' || prev == car) {
// handle double-single quotes as double quotes
if (prev == car) {
builder.deleteCharAt(builder.length() - 1);
builder.append(openDoubleQuote);
} else {
builder.append(openQuote);
}
} else {
// it is a quote ("I'm off") or a 'quote' ("This
// 'good' restaurant"...)
tentativeCloseQuote = true;
}
break;
case '"':
if (space || (brk && quote)) {
quote = true;
builder.append(openDoubleQuote);
} else if (prev == ' ') {
builder.append(openDoubleQuote);
} else {
builder.append(closeDoubleQuote);
}
break;
case '-':
if (space) {
quote = true;
} else {
dashCount++;
}
space = false;
break;
case '*':
case '~':
case '/':
case '\\':
case '<':
case '>':
case '=':
case '+':
case '_':
case '–':
case '—':
space = false;
builder.append(car);
break;
case '‘':
case '`':
case '‹':
case '﹁':
case '〈':
case '「':
if (space || (brk && quote)) {
quote = true;
builder.append(openQuote);
} else {
// handle double-single quotes as double quotes
if (prev == car) {
builder.deleteCharAt(builder.length() - 1);
builder.append(openDoubleQuote);
} else {
builder.append(openQuote);
}
}
space = false;
brk = false;
break;
case '’':
case '›':
case '﹂':
case '〉':
case '」':
space = false;
brk = false;
// handle double-single quotes as double quotes
if (prev == car) {
builder.deleteCharAt(builder.length() - 1);
builder.append(closeDoubleQuote);
} else {
builder.append(closeQuote);
}
break;
case '«':
case '“':
case '﹃':
case '《':
case '『':
if (space || (brk && quote)) {
quote = true;
builder.append(openDoubleQuote);
} else {
builder.append(openDoubleQuote);
}
space = false;
brk = false;
break;
case '»':
case '”':
case '﹄':
case '》':
case '』':
space = false;
brk = false;
builder.append(closeDoubleQuote);
break;
default:
space = false;
brk = false;
builder.append(car);
break;
}
prev = car;
}
if (tentativeCloseQuote) {
tentativeCloseQuote = false;
builder.append(closeQuote);
}
line = builder.toString().trim();
ParagraphType type = ParagraphType.NORMAL;
if (space) {
type = ParagraphType.BLANK;
} else if (brk) {
type = ParagraphType.BREAK;
} else if (quote) {
type = ParagraphType.QUOTE;
}
return new Paragraph(type, line, words);
}
/**
* Convert the given content into {@link Paragraph}s.
*
* @param support
* the linked {@link BasicSupport} (can be NULL), used to
* download optional image content in []
* @param source
* the source URL of the story (for image lookup in the same path
* if the source is a file, can be NULL)
* @param content
* the textual content
* @param html
* TRUE if the input content is in HTML mode
* @param pg
* the optional progress reporter
*
* @return the {@link Paragraph}s (can be empty but never NULL)
*
* @throws IOException
* in case of I/O error
*/
protected List |
* The resulting list will not contain a starting or trailing blank/break
* nor 2 blanks or breaks following each other.
*
* @param paras
* the list of {@link Paragraph}s to fix
*/
protected void fixBlanksBreaks(List
processing:
content = content.replaceAll("(
]*>)|(
)|(
)",
"
* * *
");
}
List
|
)");
pg.setMinMax(0, tab.length);
int i = 1;
for (String line : tab) {
if (line.startsWith("[") && line.endsWith("]")) {
pg.setName("Extracting image " + i);
}
paras.add(makeParagraph(support, source, line.trim(), html));
pg.setProgress(i++);
}
} else {
List