X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FBasicSupport.java;h=2b4715ab0beec0af04f5d52a70f88f55a61193fd;hb=2284842831ea46e89b97dd22b6e294caad361f30;hp=be198416cb2459166c66bf84222164606c097b36;hpb=3b2b638f7e1395702f843b5b19d7959327f604b2;p=fanfix.git
diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport.java b/src/be/nikiroo/fanfix/supported/BasicSupport.java
index be19841..2b4715a 100644
--- a/src/be/nikiroo/fanfix/supported/BasicSupport.java
+++ b/src/be/nikiroo/fanfix/supported/BasicSupport.java
@@ -1,10 +1,12 @@
package be.nikiroo.fanfix.supported;
import java.awt.image.BufferedImage;
+import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
+import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
@@ -58,7 +60,9 @@ public abstract class BasicSupport {
/** Furry website with comics support */
E621,
/** CBZ files */
- CBZ;
+ CBZ,
+ /** HTML files */
+ HTML;
/**
* A description of this support type (more information than the
@@ -146,7 +150,7 @@ public abstract class BasicSupport {
private InputStream in;
private SupportType type;
- private URL currentReferer; // with on 'r', as in 'HTTP'...
+ private URL currentReferer; // with only one 'r', as in 'HTTP'...
// quote chars
private char openQuote = Instance.getTrans().getChar(
@@ -283,7 +287,7 @@ public abstract class BasicSupport {
*/
protected Story processMeta(URL url, boolean close, boolean getDesc)
throws IOException {
- in = Instance.getCache().open(url, this, false);
+ in = openInput(url);
if (in == null) {
return null;
}
@@ -510,90 +514,137 @@ public abstract class BasicSupport {
Chapter chap = new Chapter(number, chapterName);
- if (content == null) {
- return chap;
+ if (content != null) {
+ chap.setParagraphs(makeParagraphs(source, content));
}
+ return chap;
+
+ }
+
+ /**
+ * Convert the given content into {@link Paragraph}s.
+ *
+ * @param source
+ * the source URL of the story
+ * @param content
+ * the textual content
+ *
+ * @return the {@link Paragraph}s
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ protected List makeParagraphs(URL source, String content)
+ throws IOException {
if (isHtml()) {
// Special
processing:
content = content.replaceAll("(
]*>)|(
)|(
)",
"\n* * *\n");
}
+ List paras = new ArrayList();
InputStream in = new ByteArrayInputStream(content.getBytes("UTF-8"));
try {
- @SuppressWarnings("resource")
- Scanner scan = new Scanner(in, "UTF-8");
- scan.useDelimiter("(\\n|
)"); // \n for test, for html
-
- List paras = new ArrayList();
- while (scan.hasNext()) {
- String line = scan.next().trim();
- boolean image = false;
- if (line.startsWith("[") && line.endsWith("]")) {
- URL url = getImageUrl(this, source,
- line.substring(1, line.length() - 1).trim());
- if (url != null) {
- paras.add(new Paragraph(url));
- image = true;
- }
+ BufferedReader buff = new BufferedReader(new InputStreamReader(in,
+ "UTF-8"));
+
+ for (String encodedLine = buff.readLine(); encodedLine != null; encodedLine = buff
+ .readLine()) {
+ String lines[];
+ if (isHtml()) {
+ lines = encodedLine.split("(|
|
|
|\\n)");
+ } else {
+ lines = new String[] { encodedLine };
}
- if (!image) {
- paras.add(processPara(line));
+ for (String aline : lines) {
+ String line = aline.trim();
+
+ URL image = null;
+ if (line.startsWith("[") && line.endsWith("]")) {
+ image = getImageUrl(this, source,
+ line.substring(1, line.length() - 1).trim());
+ }
+
+ if (image != null) {
+ paras.add(new Paragraph(image));
+ } else {
+ paras.add(processPara(line));
+ }
}
}
+ } finally {
+ in.close();
+ }
- // Check quotes for "bad" format
- List newParas = new ArrayList();
- for (Paragraph para : paras) {
- newParas.addAll(requotify(para));
- }
- paras = newParas;
-
- // Remove double blanks/brks
- boolean space = false;
- boolean brk = true;
- for (int i = 0; i < paras.size(); i++) {
- Paragraph para = paras.get(i);
- boolean thisSpace = para.getType() == ParagraphType.BLANK;
- boolean thisBrk = para.getType() == ParagraphType.BREAK;
-
- if (space && thisBrk) {
- paras.remove(i - 1);
- i--;
- } else if ((space || brk) && (thisSpace || thisBrk)) {
- paras.remove(i);
- i--;
- }
+ // Check quotes for "bad" format
+ List newParas = new ArrayList();
+ for (Paragraph para : paras) {
+ newParas.addAll(requotify(para));
+ }
+ paras = newParas;
- space = thisSpace;
- brk = thisBrk;
- }
+ // Remove double blanks/brks
+ fixBlanksBreaks(paras);
- // Remove blank/brk at start
- if (paras.size() > 0
- && (paras.get(0).getType() == ParagraphType.BLANK || paras
- .get(0).getType() == ParagraphType.BREAK)) {
- paras.remove(0);
- }
+ return paras;
+ }
- // Remove blank/brk at end
- int last = paras.size() - 1;
- if (paras.size() > 0
- && (paras.get(last).getType() == ParagraphType.BLANK || paras
- .get(last).getType() == ParagraphType.BREAK)) {
- paras.remove(last);
+ /**
+ * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
+ * those {@link Paragraph}s.
+ *
+ * The resulting list will not contain a starting or trailing blank/break
+ * nor 2 blanks or breaks following each other.
+ *
+ * @param paras
+ * the list of {@link Paragraph}s to fix
+ */
+ protected void fixBlanksBreaks(List paras) {
+ boolean space = false;
+ boolean brk = true;
+ for (int i = 0; i < paras.size(); i++) {
+ Paragraph para = paras.get(i);
+ boolean thisSpace = para.getType() == ParagraphType.BLANK;
+ boolean thisBrk = para.getType() == ParagraphType.BREAK;
+
+ if (i > 0 && space && thisBrk) {
+ paras.remove(i - 1);
+ i--;
+ } else if ((space || brk) && (thisSpace || thisBrk)) {
+ paras.remove(i);
+ i--;
}
- chap.setParagraphs(paras);
+ space = thisSpace;
+ brk = thisBrk;
+ }
- return chap;
- } finally {
- in.close();
+ // Remove blank/brk at start
+ if (paras.size() > 0
+ && (paras.get(0).getType() == ParagraphType.BLANK || paras.get(
+ 0).getType() == ParagraphType.BREAK)) {
+ paras.remove(0);
+ }
+
+ // Remove blank/brk at end
+ int last = paras.size() - 1;
+ if (paras.size() > 0
+ && (paras.get(last).getType() == ParagraphType.BLANK || paras
+ .get(last).getType() == ParagraphType.BREAK)) {
+ paras.remove(last);
}
}
+ /**
+ * Get the default cover related to this subject (see .info files).
+ *
+ * @param subject
+ * the subject
+ *
+ * @return the cover if any, or NULL
+ */
static BufferedImage getDefaultCover(String subject) {
if (subject != null && !subject.isEmpty()
&& Instance.getCoverDir() != null) {
@@ -716,6 +767,21 @@ public abstract class BasicSupport {
return url;
}
+ /**
+ * Open the input file that will be used through the support.
+ *
+ * @param source
+ * the source {@link URL}
+ *
+ * @return the {@link InputStream}
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ protected InputStream openInput(URL source) throws IOException {
+ return Instance.getCache().open(source, this, false);
+ }
+
protected InputStream reset(InputStream in) {
try {
in.reset();
@@ -772,7 +838,7 @@ public abstract class BasicSupport {
*
* @return the correctly (or so we hope) quotified paragraphs
*/
- private List requotify(Paragraph para) {
+ protected List requotify(Paragraph para) {
List newParas = new ArrayList();
if (para.getType() == ParagraphType.QUOTE
@@ -847,7 +913,7 @@ public abstract class BasicSupport {
*
* @return the processed {@link Paragraph}
*/
- private Paragraph processPara(String line) {
+ protected Paragraph processPara(String line) {
line = ifUnhtml(line).trim();
boolean space = true;
@@ -870,11 +936,16 @@ public abstract class BasicSupport {
if (tentativeCloseQuote) {
tentativeCloseQuote = false;
- if ((car >= 'a' && car <= 'z') || (car >= 'A' && car <= 'Z')
- || (car >= '0' && car <= '9')) {
+ if (Character.isLetterOrDigit(car)) {
builder.append("'");
} else {
- builder.append(closeQuote);
+ // handle double-single quotes as double quotes
+ if (prev == car) {
+ builder.append(closeDoubleQuote);
+ continue;
+ } else {
+ builder.append(closeQuote);
+ }
}
}
@@ -890,9 +961,21 @@ public abstract class BasicSupport {
case '\'':
if (space || (brk && quote)) {
quote = true;
- builder.append(openQuote);
- } else if (prev == ' ') {
- builder.append(openQuote);
+ // handle double-single quotes as double quotes
+ if (prev == car) {
+ builder.deleteCharAt(builder.length() - 1);
+ builder.append(openDoubleQuote);
+ } else {
+ builder.append(openQuote);
+ }
+ } else if (prev == ' ' || prev == car) {
+ // handle double-single quotes as double quotes
+ if (prev == car) {
+ builder.deleteCharAt(builder.length() - 1);
+ builder.append(openDoubleQuote);
+ } else {
+ builder.append(openQuote);
+ }
} else {
// it is a quote ("I'm off") or a 'quote' ("This
// 'good' restaurant"...)
@@ -945,7 +1028,13 @@ public abstract class BasicSupport {
quote = true;
builder.append(openQuote);
} else {
- builder.append(openQuote);
+ // handle double-single quotes as double quotes
+ if (prev == car) {
+ builder.deleteCharAt(builder.length() - 1);
+ builder.append(openDoubleQuote);
+ } else {
+ builder.append(openQuote);
+ }
}
space = false;
brk = false;
@@ -958,7 +1047,13 @@ public abstract class BasicSupport {
case 'ã':
space = false;
brk = false;
- builder.append(closeQuote);
+ // handle double-single quotes as double quotes
+ if (prev == car) {
+ builder.deleteCharAt(builder.length() - 1);
+ builder.append(closeDoubleQuote);
+ } else {
+ builder.append(closeQuote);
+ }
break;
case '«':
@@ -1056,8 +1151,8 @@ public abstract class BasicSupport {
}
}
- for (SupportType type : new SupportType[] { SupportType.TEXT,
- SupportType.INFO_TEXT }) {
+ for (SupportType type : new SupportType[] { SupportType.INFO_TEXT,
+ SupportType.TEXT }) {
BasicSupport support = getSupport(type);
if (support != null && support.supports(url)) {
return support;
@@ -1093,6 +1188,8 @@ public abstract class BasicSupport {
return new E621().setType(type);
case CBZ:
return new Cbz().setType(type);
+ case HTML:
+ return new Html().setType(type);
}
return null;