X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Ffanfix%2Fsupported%2FBasicSupport.java;h=e3b6fab345faa1886e491f6d22742624d6415c19;hb=754a5bc205f6a50f3fe3fe7c2dfb09d8a8dd09bb;hp=b6fd1e277d67c3facbba0738b45f096f49d4734c;hpb=68e370a441d8e6b10bfaa904ecacb29e7d6160d8;p=fanfix.git diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport.java b/src/be/nikiroo/fanfix/supported/BasicSupport.java index b6fd1e2..e3b6fab 100644 --- a/src/be/nikiroo/fanfix/supported/BasicSupport.java +++ b/src/be/nikiroo/fanfix/supported/BasicSupport.java @@ -10,6 +10,7 @@ import java.io.InputStreamReader; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; +import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -59,8 +60,12 @@ public abstract class BasicSupport { MANGAFOX, /** Furry website with comics support */ E621, + /** Furry website with stories */ + YIFFSTAR, /** CBZ files */ - CBZ; + CBZ, + /** HTML files */ + HTML; /** * A description of this support type (more information than the @@ -148,7 +153,7 @@ public abstract class BasicSupport { private InputStream in; private SupportType type; - private URL currentReferer; // with on 'r', as in 'HTTP'... + private URL currentReferer; // with only one 'r', as in 'HTTP'... // quote chars private char openQuote = Instance.getTrans().getChar( @@ -239,6 +244,16 @@ public abstract class BasicSupport { protected abstract String getChapterContent(URL source, InputStream in, int number) throws IOException; + /** + * Log into the support (can be a no-op depending upon the support). + * + * @throws IOException + * in case of I/O error + */ + public void login() throws IOException { + + } + /** * Return the list of cookies (values included) that must be used to * correctly fetch the resources. @@ -247,11 +262,29 @@ public abstract class BasicSupport { * it. * * @return the cookies + * + * @throws IOException + * in case of I/O error */ - public Map getCookies() { + public Map getCookies() throws IOException { return new HashMap(); } + /** + * Return the canonical form of the main {@link URL}. + * + * @param source + * the source {@link URL} + * + * @return the canonical form of this {@link URL} + * + * @throws IOException + * in case of I/O error + */ + public URL getCanonicalUrl(URL source) throws IOException { + return source; + } + /** * Process the given story resource into a partially filled {@link Story} * object containing the name and metadata, except for the description. @@ -285,7 +318,13 @@ public abstract class BasicSupport { */ protected Story processMeta(URL url, boolean close, boolean getDesc) throws IOException { - in = Instance.getCache().open(url, this, false); + login(); + + url = getCanonicalUrl(url); + + setCurrentReferer(url); + + in = openInput(url); if (in == null) { return null; } @@ -295,6 +334,10 @@ public abstract class BasicSupport { Story story = new Story(); MetaData meta = getMeta(url, getInput()); + if (meta.getCreationDate() == null + || meta.getCreationDate().isEmpty()) { + meta.setCreationDate(StringUtils.fromTime(new Date().getTime())); + } story.setMeta(meta); if (meta != null && meta.getCover() == null) { @@ -322,6 +365,8 @@ public abstract class BasicSupport { in.close(); } } + + setCurrentReferer(null); } } @@ -346,8 +391,7 @@ public abstract class BasicSupport { pg.setMinMax(0, 100); } - setCurrentReferer(url); - + url = getCanonicalUrl(url); pg.setProgress(1); try { Story story = processMeta(url, false, true); @@ -357,6 +401,10 @@ public abstract class BasicSupport { return null; } + pg.setName("Retrieving " + story.getMeta().getTitle()); + + setCurrentReferer(url); + story.setChapters(new ArrayList()); List> chapters = getChapters(url, getInput()); @@ -367,14 +415,19 @@ public abstract class BasicSupport { Progress pgChaps = new Progress(0, chapters.size()); pg.addProgress(pgChaps, 80); + long words = 0; for (Entry chap : chapters) { setCurrentReferer(chap.getValue()); InputStream chapIn = Instance.getCache().open( chap.getValue(), this, true); try { - story.getChapters().add( - makeChapter(url, i, chap.getKey(), - getChapterContent(url, chapIn, i))); + Chapter cc = makeChapter(url, i, chap.getKey(), + getChapterContent(url, chapIn, i)); + words += cc.getWords(); + story.getChapters().add(cc); + if (story.getMeta() != null) { + story.getMeta().setWords(words); + } } finally { chapIn.close(); } @@ -398,12 +451,12 @@ public abstract class BasicSupport { in.close(); } - currentReferer = null; + setCurrentReferer(null); } } /** - * The support type.$ + * The support type. * * @return the type */ @@ -513,7 +566,13 @@ public abstract class BasicSupport { Chapter chap = new Chapter(number, chapterName); if (content != null) { - chap.setParagraphs(makeParagraphs(source, content)); + List paras = makeParagraphs(source, content); + long words = 0; + for (Paragraph para : paras) { + words += para.getWords(); + } + chap.setParagraphs(paras); + chap.setWords(words); } return chap; @@ -659,6 +718,11 @@ public abstract class BasicSupport { /** * Return the list of supported image extensions. * + * @param emptyAllowed + * TRUE to allow an empty extension on first place, which can be + * used when you may already have an extension in your input but + * are not sure about it + * * @return the extensions */ static String[] getImageExt(boolean emptyAllowed) { @@ -669,6 +733,18 @@ public abstract class BasicSupport { } } + /** + * Check if the given resource can be a local image or a remote image, then + * refresh the cache with it if it is. + * + * @param source + * the story source + * @param line + * the resource to check + * + * @return the image if found, or NULL + * + */ static BufferedImage getImage(BasicSupport support, URL source, String line) { URL url = getImageUrl(support, source, line); if (url != null) { @@ -765,6 +841,29 @@ public abstract class BasicSupport { return url; } + /** + * Open the input file that will be used through the support. + * + * @param source + * the source {@link URL} + * + * @return the {@link InputStream} + * + * @throws IOException + * in case of I/O error + */ + protected InputStream openInput(URL source) throws IOException { + return Instance.getCache().open(source, this, false); + } + + /** + * Reset the given {@link InputStream} and return it. + * + * @param in + * the {@link InputStream} to reset + * + * @return the same {@link InputStream} after reset + */ protected InputStream reset(InputStream in) { try { in.reset(); @@ -817,7 +916,7 @@ public abstract class BasicSupport { * paragraphs (quotes or not)). * * @param para - * the paragraph to requotify (not necessaraly a quote) + * the paragraph to requotify (not necessarily a quote) * * @return the correctly (or so we hope) quotified paragraphs */ @@ -848,7 +947,8 @@ public abstract class BasicSupport { if (!singleQ && !doubleQ) { line = openDoubleQuote + line + closeDoubleQuote; - newParas.add(new Paragraph(ParagraphType.QUOTE, line)); + newParas.add(new Paragraph(ParagraphType.QUOTE, line, para + .getWords())); } else { char open = singleQ ? openQuote : openDoubleQuote; char close = singleQ ? closeQuote : closeDoubleQuote; @@ -871,7 +971,13 @@ public abstract class BasicSupport { if (posDot >= 0) { String rest = line.substring(posDot + 1).trim(); line = line.substring(0, posDot + 1).trim(); - newParas.add(new Paragraph(ParagraphType.QUOTE, line)); + long words = 1; + for (char car : line.toCharArray()) { + if (car == ' ') { + words++; + } + } + newParas.add(new Paragraph(ParagraphType.QUOTE, line, words)); if (!rest.isEmpty()) { newParas.addAll(requotify(processPara(rest))); } @@ -896,7 +1002,7 @@ public abstract class BasicSupport { * * @return the processed {@link Paragraph} */ - private Paragraph processPara(String line) { + protected Paragraph processPara(String line) { line = ifUnhtml(line).trim(); boolean space = true; @@ -905,6 +1011,7 @@ public abstract class BasicSupport { boolean tentativeCloseQuote = false; char prev = '\0'; int dashCount = 0; + long words = 1; StringBuilder builder = new StringBuilder(); for (char car : line.toCharArray()) { @@ -919,11 +1026,16 @@ public abstract class BasicSupport { if (tentativeCloseQuote) { tentativeCloseQuote = false; - if ((car >= 'a' && car <= 'z') || (car >= 'A' && car <= 'Z') - || (car >= '0' && car <= '9')) { + if (Character.isLetterOrDigit(car)) { builder.append("'"); } else { - builder.append(closeQuote); + // handle double-single quotes as double quotes + if (prev == car) { + builder.append(closeDoubleQuote); + continue; + } else { + builder.append(closeQuote); + } } } @@ -933,15 +1045,31 @@ public abstract class BasicSupport { case '\t': case '\n': // just in case case '\r': // just in case + if (builder.length() > 0 + && builder.charAt(builder.length() - 1) != ' ') { + words++; + } builder.append(' '); break; case '\'': if (space || (brk && quote)) { quote = true; - builder.append(openQuote); - } else if (prev == ' ') { - builder.append(openQuote); + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(openDoubleQuote); + } else { + builder.append(openQuote); + } + } else if (prev == ' ' || prev == car) { + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(openDoubleQuote); + } else { + builder.append(openQuote); + } } else { // it is a quote ("I'm off") or a 'quote' ("This // 'good' restaurant"...) @@ -994,7 +1122,13 @@ public abstract class BasicSupport { quote = true; builder.append(openQuote); } else { - builder.append(openQuote); + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(openDoubleQuote); + } else { + builder.append(openQuote); + } } space = false; brk = false; @@ -1007,7 +1141,13 @@ public abstract class BasicSupport { case '」': space = false; brk = false; - builder.append(closeQuote); + // handle double-single quotes as double quotes + if (prev == car) { + builder.deleteCharAt(builder.length() - 1); + builder.append(closeDoubleQuote); + } else { + builder.append(closeQuote); + } break; case '«': @@ -1061,11 +1201,11 @@ public abstract class BasicSupport { type = ParagraphType.QUOTE; } - return new Paragraph(type, line); + return new Paragraph(type, line, words); } /** - * Remove the HTML from the inpit if {@link BasicSupport#isHtml()} is + * Remove the HTML from the input if {@link BasicSupport#isHtml()} is * true. * * @param input @@ -1105,8 +1245,8 @@ public abstract class BasicSupport { } } - for (SupportType type : new SupportType[] { SupportType.TEXT, - SupportType.INFO_TEXT }) { + for (SupportType type : new SupportType[] { SupportType.INFO_TEXT, + SupportType.TEXT }) { BasicSupport support = getSupport(type); if (support != null && support.supports(url)) { return support; @@ -1140,8 +1280,12 @@ public abstract class BasicSupport { return new MangaFox().setType(type); case E621: return new E621().setType(type); + case YIFFSTAR: + return new YiffStar().setType(type); case CBZ: return new Cbz().setType(type); + case HTML: + return new Html().setType(type); } return null;