From 68686a37a591a767f6d1af428ea0d5f3d3a1ddc1 Mon Sep 17 00:00:00 2001 From: Niki Roo Date: Sun, 12 Feb 2017 22:15:39 +0100 Subject: [PATCH] Library scanning much quicker BasicSupport rewrite to allow quicker scanning of library items --- src/be/nikiroo/fanfix/Instance.java | 8 +- src/be/nikiroo/fanfix/Library.java | 49 +- src/be/nikiroo/fanfix/Main.java | 2 +- src/be/nikiroo/fanfix/output/BasicOutput.java | 4 +- .../fanfix/supported/BasicSupport.java | 553 ++++++------------ src/be/nikiroo/fanfix/supported/Cbz.java | 16 +- src/be/nikiroo/fanfix/supported/E621.java | 44 +- src/be/nikiroo/fanfix/supported/Epub.java | 193 ++---- .../nikiroo/fanfix/supported/Fanfiction.java | 54 +- .../nikiroo/fanfix/supported/Fimfiction.java | 51 +- .../nikiroo/fanfix/supported/InfoReader.java | 123 ++++ src/be/nikiroo/fanfix/supported/InfoText.java | 220 +------ src/be/nikiroo/fanfix/supported/MangaFox.java | 56 +- src/be/nikiroo/fanfix/supported/Text.java | 74 +-- 14 files changed, 571 insertions(+), 876 deletions(-) create mode 100644 src/be/nikiroo/fanfix/supported/InfoReader.java diff --git a/src/be/nikiroo/fanfix/Instance.java b/src/be/nikiroo/fanfix/Instance.java index 2289445..ae96d48 100644 --- a/src/be/nikiroo/fanfix/Instance.java +++ b/src/be/nikiroo/fanfix/Instance.java @@ -55,7 +55,13 @@ public class Instance { } trans = new StringIdBundle(getLang()); - lib = new Library(getFile(Config.LIBRARY_DIR)); + try { + lib = new Library(getFile(Config.LIBRARY_DIR)); + } catch (Exception e) { + syserr(new IOException("Cannot create library for directory: " + + getFile(Config.LIBRARY_DIR), e)); + } + debug = Instance.getConfig().getBoolean(Config.DEBUG_ERR, false); coverDir = getFile(Config.DEFAULT_COVERS_DIR); File tmp = getFile(Config.CACHE_DIR); diff --git a/src/be/nikiroo/fanfix/Library.java b/src/be/nikiroo/fanfix/Library.java index a120df1..1b9419a 100644 --- a/src/be/nikiroo/fanfix/Library.java +++ b/src/be/nikiroo/fanfix/Library.java @@ -9,13 +9,13 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; -import be.nikiroo.fanfix.bundles.Config; import be.nikiroo.fanfix.data.MetaData; import be.nikiroo.fanfix.data.Story; import be.nikiroo.fanfix.output.BasicOutput; import be.nikiroo.fanfix.output.BasicOutput.OutputType; import be.nikiroo.fanfix.supported.BasicSupport; import be.nikiroo.fanfix.supported.BasicSupport.SupportType; +import be.nikiroo.fanfix.supported.InfoReader; /** * Manage a library of Stories: import, export, list. @@ -224,46 +224,27 @@ public class Library { private Map getStories() { if (stories.isEmpty()) { lastId = 0; - String format = "." - + Instance.getConfig().getString(Config.IMAGE_FORMAT_COVER) - .toLowerCase(); + for (File dir : baseDir.listFiles()) { if (dir.isDirectory()) { for (File file : dir.listFiles()) { try { - String path = file.getPath().toLowerCase(); - if (!path.endsWith(".info") - && !path.endsWith(format)) { - // TODO: export .info reading to a class and use - // it here - SupportType type = SupportType.INFO_TEXT; - if (path.toLowerCase().endsWith(".cbz")) { - type = SupportType.CBZ; - } - BasicSupport support = BasicSupport - .getSupport(type); - MetaData meta = support.processMeta( - file.toURI().toURL()).getMeta(); - if (meta != null) { - stories.put(meta, file); - try { - int id = Integer.parseInt(meta - .getLuid()); - if (id > lastId) { - lastId = id; - } - } catch (Exception e) { - // not normal!! - Instance.syserr(new IOException( - "Cannot understand the LUID of " - + file.getPath() + ": " - + meta.getLuid(), e)); + if (file.getPath().toLowerCase().endsWith(".info")) { + MetaData meta = InfoReader.readMeta(file); + try { + int id = Integer.parseInt(meta.getLuid()); + if (id > lastId) { + lastId = id; } - } else { + + stories.put(meta, file); + + } catch (Exception e) { // not normal!! Instance.syserr(new IOException( - "Cannot get metadata for: " - + file.getPath())); + "Cannot understand the LUID of " + + file.getPath() + ": " + + meta.getLuid(), e)); } } } catch (IOException e) { diff --git a/src/be/nikiroo/fanfix/Main.java b/src/be/nikiroo/fanfix/Main.java index 406e8a2..0938fea 100644 --- a/src/be/nikiroo/fanfix/Main.java +++ b/src/be/nikiroo/fanfix/Main.java @@ -64,7 +64,7 @@ public class Main { String target = null; MainAction action = MainAction.HELP; Boolean plusInfo = null; - + boolean noMoreActions = false; int exitCode = 0; diff --git a/src/be/nikiroo/fanfix/output/BasicOutput.java b/src/be/nikiroo/fanfix/output/BasicOutput.java index 8728284..e2bf2ff 100644 --- a/src/be/nikiroo/fanfix/output/BasicOutput.java +++ b/src/be/nikiroo/fanfix/output/BasicOutput.java @@ -241,9 +241,9 @@ public abstract class BasicOutput { imageName = paragraphNumber + "_" + chapterNameNum + ".png"; if (story.getMeta() != null) { - story.getMeta().setType(getType().toString()); + story.getMeta().setType("" + getType()); } - + if (writeCover) { InfoCover.writeCover(targetDir, targetName, story.getMeta()); } diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport.java b/src/be/nikiroo/fanfix/supported/BasicSupport.java index b951728..6d44c04 100644 --- a/src/be/nikiroo/fanfix/supported/BasicSupport.java +++ b/src/be/nikiroo/fanfix/supported/BasicSupport.java @@ -1,5 +1,6 @@ package be.nikiroo.fanfix.supported; +import java.awt.image.BufferedImage; import java.io.ByteArrayInputStream; import java.io.File; import java.io.IOException; @@ -13,6 +14,8 @@ import java.util.Map; import java.util.Map.Entry; import java.util.Scanner; +import javax.imageio.ImageIO; + import be.nikiroo.fanfix.Instance; import be.nikiroo.fanfix.bundles.Config; import be.nikiroo.fanfix.bundles.StringId; @@ -141,7 +144,6 @@ public abstract class BasicSupport { } } - /** Only used by {@link BasicSupport#getInput()} just so it is always reset. */ private InputStream in; private SupportType type; private URL currentReferer; // with on 'r', as in 'HTTP'... @@ -181,70 +183,7 @@ public abstract class BasicSupport { */ protected abstract boolean isHtml(); - /** - * Return the story title. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the title - * - * @throws IOException - * in case of I/O error - */ - protected abstract String getTitle(URL source, InputStream in) - throws IOException; - - /** - * Return the story author. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the author - * - * @throws IOException - * in case of I/O error - */ - protected abstract String getAuthor(URL source, InputStream in) - throws IOException; - - /** - * Return the story publication date. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the date - * - * @throws IOException - * in case of I/O error - */ - protected abstract String getDate(URL source, InputStream in) - throws IOException; - - /** - * Return the subject of the story (for instance, if it is a fanfiction, - * what is the original work; if it is a technical text, what is the - * technical subject...). - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the subject - * - * @throws IOException - * in case of I/O error - */ - protected abstract String getSubject(URL source, InputStream in) + protected abstract MetaData getMeta(URL source, InputStream in) throws IOException; /** @@ -263,24 +202,6 @@ public abstract class BasicSupport { protected abstract String getDesc(URL source, InputStream in) throws IOException; - /** - * Return the story cover resource if any, or NULL if none. - *

- * The default cover should not be checked for here. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the cover or NULL - * - * @throws IOException - * in case of I/O error - */ - protected abstract URL getCover(URL source, InputStream in) - throws IOException; - /** * Return the list of chapters (name and resource). * @@ -316,16 +237,6 @@ public abstract class BasicSupport { protected abstract String getChapterContent(URL source, InputStream in, int number) throws IOException; - /** - * Check if this {@link BasicSupport} is mainly catered to image files. - * - * @return TRUE if it is - */ - public boolean isImageDocument(URL source, InputStream in) - throws IOException { - return false; - } - /** * Return the list of cookies (values included) that must be used to * correctly fetch the resources. @@ -378,23 +289,15 @@ public abstract class BasicSupport { } try { - preprocess(getInput()); + preprocess(url, getInput()); Story story = new Story(); - story.setMeta(new MetaData()); - story.getMeta().setTitle(ifUnhtml(getTitle(url, getInput()))); - story.getMeta().setAuthor( - fixAuthor(ifUnhtml(getAuthor(url, getInput())))); - story.getMeta().setDate(ifUnhtml(getDate(url, getInput()))); - story.getMeta().setTags(getTags(url, getInput())); - story.getMeta().setSource(getSourceName()); - story.getMeta().setPublisher( - ifUnhtml(getPublisher(url, getInput()))); - story.getMeta().setUuid(getUuid(url, getInput())); - story.getMeta().setLuid(getLuid(url, getInput())); - story.getMeta().setLang(getLang(url, getInput())); - story.getMeta().setSubject(ifUnhtml(getSubject(url, getInput()))); - story.getMeta().setImageDocument(isImageDocument(url, getInput())); + MetaData meta = getMeta(url, getInput()); + story.setMeta(meta); + + if (meta != null && meta.getCover() == null) { + meta.setCover(getDefaultCover(meta.getSubject())); + } if (getDesc) { String descChapterName = Instance.getTrans().getString( @@ -443,31 +346,6 @@ public abstract class BasicSupport { story.setChapters(new ArrayList()); - URL cover = getCover(url, getInput()); - if (cover == null) { - String subject = story.getMeta() == null ? null : story - .getMeta().getSubject(); - if (subject != null && !subject.isEmpty() - && Instance.getCoverDir() != null) { - File fileCover = new File(Instance.getCoverDir(), subject); - cover = getImage(fileCover.toURI().toURL(), subject); - } - } - - if (cover != null) { - InputStream coverIn = null; - try { - coverIn = Instance.getCache().open(cover, this, true); - story.getMeta().setCover(StringUtils.toImage(coverIn)); - } catch (IOException e) { - Instance.syserr(new IOException(Instance.getTrans() - .getString(StringId.ERR_BS_NO_COVER, cover), e)); - } finally { - if (coverIn != null) - coverIn.close(); - } - } - List> chapters = getChapters(url, getInput()); int i = 1; if (chapters != null) { @@ -547,191 +425,17 @@ public abstract class BasicSupport { } /** - * Return the story publisher (by default, - * {@link BasicSupport#getSourceName()}). - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the publisher - * - * @throws IOException - * in case of I/O error - */ - protected String getPublisher(URL source, InputStream in) - throws IOException { - return getSourceName(); - } - - /** - * Return the story UUID, a unique value representing the story (it is often - * an URL). - *

- * By default, this is the {@link URL} of the resource. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the uuid - * - * @throws IOException - * in case of I/O error - */ - protected String getUuid(URL source, InputStream in) throws IOException { - return source.toString(); - } - - /** - * Return the story Library UID, a unique value representing the story (it - * is often a number) in the local library. - *

- * By default, this is empty. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the id - * - * @throws IOException - * in case of I/O error - */ - protected String getLuid(URL source, InputStream in) throws IOException { - return ""; - } - - /** - * Return the 2-letter language code of this story. - *

- * By default, this is 'EN'. - * - * @param source - * the source of the story - * @param in - * the input (the main resource) - * - * @return the language - * - * @throws IOException - * in case of I/O error - */ - protected String getLang(URL source, InputStream in) throws IOException { - return "EN"; - } - - /** - * Return the list of tags for this story. + * Prepare the support if needed before processing. * * @param source * the source of the story * @param in * the input (the main resource) * - * @return the tags - * - * @throws IOException - * in case of I/O error - */ - protected List getTags(URL source, InputStream in) - throws IOException { - return new ArrayList(); - } - - /** - * Return the first line from the given input which correspond to the given - * selectors. - *

- * Do not reset the input, which will be pointing at the line just after the - * result (input will be spent if no result is found). - * - * @param in - * the input - * @param needle - * a string that must be found inside the target line (also - * supports "^" at start to say "only if it starts with" the - * needle) - * @param relativeLine - * the line to return based upon the target line position (-1 = - * the line before, 0 = the target line...) - * - * @return the line - */ - protected String getLine(InputStream in, String needle, int relativeLine) { - return getLine(in, needle, relativeLine, true); - } - - /** - * Return a line from the given input which correspond to the given - * selectors. - *

- * Do not reset the input, which will be pointing at the line just after the - * result (input will be spent if no result is found) when first is TRUE, - * and will always be spent if first is FALSE. - * - * @param in - * the input - * @param needle - * a string that must be found inside the target line (also - * supports "^" at start to say "only if it starts with" the - * needle) - * @param relativeLine - * the line to return based upon the target line position (-1 = - * the line before, 0 = the target line...) - * @param first - * takes the first result (as opposed to the last one, which will - * also always spend the input) - * - * @return the line - */ - protected String getLine(InputStream in, String needle, int relativeLine, - boolean first) { - String rep = null; - - List lines = new ArrayList(); - @SuppressWarnings("resource") - Scanner scan = new Scanner(in, "UTF-8"); - int index = -1; - scan.useDelimiter("\\n"); - while (scan.hasNext()) { - lines.add(scan.next()); - - if (index == -1) { - if (needle.startsWith("^")) { - if (lines.get(lines.size() - 1).startsWith( - needle.substring(1))) { - index = lines.size() - 1; - } - - } else { - if (lines.get(lines.size() - 1).contains(needle)) { - index = lines.size() - 1; - } - } - } - - if (index >= 0 && index + relativeLine < lines.size()) { - rep = lines.get(index + relativeLine); - if (first) { - break; - } - } - } - - return rep; - } - - /** - * Prepare the support if needed before processing. - * * @throws IOException * on I/O error */ - protected void preprocess(InputStream in) throws IOException { + protected void preprocess(URL source, InputStream in) throws IOException { } /** @@ -809,7 +513,7 @@ public abstract class BasicSupport { String line = scan.next().trim(); boolean image = false; if (line.startsWith("[") && line.endsWith("]")) { - URL url = getImage(source, + URL url = getImageUrl(source, line.substring(1, line.length() - 1).trim()); if (url != null) { paras.add(new Paragraph(url)); @@ -872,12 +576,25 @@ public abstract class BasicSupport { } } + static BufferedImage getDefaultCover(String subject) { + if (subject != null && !subject.isEmpty() + && Instance.getCoverDir() != null) { + try { + File fileCover = new File(Instance.getCoverDir(), subject); + return getImage(fileCover.toURI().toURL(), subject); + } catch (MalformedURLException e) { + } + } + + return null; + } + /** * Return the list of supported image extensions. * * @return the extensions */ - protected String[] getImageExt(boolean emptyAllowed) { + static String[] getImageExt(boolean emptyAllowed) { if (emptyAllowed) { return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" }; } else { @@ -885,6 +602,27 @@ public abstract class BasicSupport { } } + static BufferedImage getImage(URL source, String line) { + URL url = getImageUrl(source, line); + if (url != null) { + InputStream in = null; + try { + in = Instance.getCache().open(url, getSupport(url), true); + return ImageIO.read(in); + } catch (IOException e) { + } finally { + if (in != null) { + try { + in.close(); + } catch (IOException e) { + } + } + } + } + + return null; + } + /** * Check if the given resource can be a local image or a remote image, then * refresh the cache with it if it is. @@ -897,74 +635,84 @@ public abstract class BasicSupport { * @return the image URL if found, or NULL * */ - protected URL getImage(URL source, String line) { - String path = new File(source.getFile()).getParent(); + static URL getImageUrl(URL source, String line) { URL url = null; - // try for files - try { - String urlBase = new File(new File(path), line.trim()).toURI() - .toURL().toString(); - for (String ext : getImageExt(true)) { - if (new File(urlBase + ext).exists()) { - url = new File(urlBase + ext).toURI().toURL(); - } - } - } catch (Exception e) { - // Nothing to do here - } - - if (url == null) { - // try for URLs - try { - for (String ext : getImageExt(true)) { - if (Instance.getCache().check(new URL(line + ext))) { - url = new URL(line + ext); + if (line != null) { + // try for files + String path = null; + if (source != null) { + path = new File(source.getFile()).getParent(); + try { + String urlBase = new File(new File(path), line.trim()) + .toURI().toURL().toString(); + for (String ext : getImageExt(true)) { + if (new File(urlBase + ext).exists()) { + url = new File(urlBase + ext).toURI().toURL(); + } } + } catch (Exception e) { + // Nothing to do here } + } - // try out of cache - if (url == null) { + if (url == null) { + // try for URLs + try { for (String ext : getImageExt(true)) { - try { + if (Instance.getCache().check(new URL(line + ext))) { url = new URL(line + ext); - Instance.getCache().refresh(url, this, true); - break; - } catch (IOException e) { - // no image with this ext - url = null; } } + + // try out of cache + if (url == null) { + for (String ext : getImageExt(true)) { + try { + url = new URL(line + ext); + Instance.getCache().refresh(url, + getSupport(url), true); + break; + } catch (IOException e) { + // no image with this ext + url = null; + } + } + } + } catch (MalformedURLException e) { + // Not an url } - } catch (MalformedURLException e) { - // Not an url } - } - // refresh the cached file - if (url != null) { - try { - Instance.getCache().refresh(url, this, true); - } catch (IOException e) { - // woops, broken image - url = null; + // refresh the cached file + if (url != null) { + try { + Instance.getCache().refresh(url, getSupport(url), true); + } catch (IOException e) { + // woops, broken image + url = null; + } } } return url; } + protected InputStream reset(InputStream in) { + try { + in.reset(); + } catch (IOException e) { + } + return in; + } + /** * Reset then return {@link BasicSupport#in}. * * @return {@link BasicSupport#in} - * - * @throws IOException - * in case of I/O error */ - protected InputStream getInput() throws IOException { - in.reset(); - return in; + protected InputStream getInput() { + return reset(in); } /** @@ -975,7 +723,7 @@ public abstract class BasicSupport { * * @return the author without prefixes */ - private String fixAuthor(String author) { + protected String fixAuthor(String author) { if (author != null) { for (String suffix : new String[] { " ", ":" }) { for (String byString : Instance.getConfig() @@ -1009,7 +757,8 @@ public abstract class BasicSupport { private List requotify(Paragraph para) { List newParas = new ArrayList(); - if (para.getType() == ParagraphType.QUOTE) { + if (para.getType() == ParagraphType.QUOTE + && para.getContent().length() > 2) { String line = para.getContent(); boolean singleQ = line.startsWith("" + openQuote); boolean doubleQ = line.startsWith("" + openDoubleQuote); @@ -1019,7 +768,7 @@ public abstract class BasicSupport { newParas.add(new Paragraph(ParagraphType.QUOTE, line)); } else { char close = singleQ ? closeQuote : closeDoubleQuote; - int posClose = line.indexOf(close); + int posClose = line.indexOf(close, 1); int posDot = line.indexOf("."); while (posDot >= 0 && posDot < posClose) { posDot = line.indexOf(".", posDot + 1); @@ -1029,7 +778,9 @@ public abstract class BasicSupport { String rest = line.substring(posDot + 1).trim(); line = line.substring(0, posDot + 1).trim(); newParas.add(new Paragraph(ParagraphType.QUOTE, line)); - newParas.addAll(requotify(processPara(rest))); + if (!rest.isEmpty()) { + newParas.addAll(requotify(processPara(rest))); + } } else { newParas.add(para); } @@ -1301,4 +1052,86 @@ public abstract class BasicSupport { return null; } + + /** + * Return the first line from the given input which correspond to the given + * selectors. + * + * @param in + * the input + * @param needle + * a string that must be found inside the target line (also + * supports "^" at start to say "only if it starts with" the + * needle) + * @param relativeLine + * the line to return based upon the target line position (-1 = + * the line before, 0 = the target line...) + * + * @return the line + */ + static String getLine(InputStream in, String needle, int relativeLine) { + return getLine(in, needle, relativeLine, true); + } + + /** + * Return a line from the given input which correspond to the given + * selectors. + * + * @param in + * the input + * @param needle + * a string that must be found inside the target line (also + * supports "^" at start to say "only if it starts with" the + * needle) + * @param relativeLine + * the line to return based upon the target line position (-1 = + * the line before, 0 = the target line...) + * @param first + * takes the first result (as opposed to the last one, which will + * also always spend the input) + * + * @return the line + */ + static String getLine(InputStream in, String needle, int relativeLine, + boolean first) { + String rep = null; + + try { + in.reset(); + } catch (IOException e) { + Instance.syserr(e); + } + + List lines = new ArrayList(); + @SuppressWarnings("resource") + Scanner scan = new Scanner(in, "UTF-8"); + int index = -1; + scan.useDelimiter("\\n"); + while (scan.hasNext()) { + lines.add(scan.next()); + + if (index == -1) { + if (needle.startsWith("^")) { + if (lines.get(lines.size() - 1).startsWith( + needle.substring(1))) { + index = lines.size() - 1; + } + + } else { + if (lines.get(lines.size() - 1).contains(needle)) { + index = lines.size() - 1; + } + } + } + + if (index >= 0 && index + relativeLine < lines.size()) { + rep = lines.get(index + relativeLine); + if (first) { + break; + } + } + } + + return rep; + } } diff --git a/src/be/nikiroo/fanfix/supported/Cbz.java b/src/be/nikiroo/fanfix/supported/Cbz.java index 012c047..f9eee08 100644 --- a/src/be/nikiroo/fanfix/supported/Cbz.java +++ b/src/be/nikiroo/fanfix/supported/Cbz.java @@ -39,18 +39,18 @@ class Cbz extends Epub { protected boolean requireInfo() { return false; } - - @Override - public boolean isImageDocument(URL source, InputStream in) - throws IOException { - return true; - } @Override protected boolean getCover() { return false; } + @Override + protected void preprocess(URL source, InputStream in) throws IOException { + super.preprocess(source, in); + meta.setImageDocument(true); + } + @Override public Story process(URL url) throws IOException { Story story = processMeta(url, false, true); @@ -74,9 +74,7 @@ class Cbz extends Epub { if (imageEntry) { try { - // we assume that we can get the UUID without a stream - String uuid = getUuid(url, null) + "_" - + entry.getName(); + String uuid = meta.getUuid() + "_" + entry.getName(); Instance.getCache().addToCache(zipIn, uuid); chap.getParagraphs().add( diff --git a/src/be/nikiroo/fanfix/supported/E621.java b/src/be/nikiroo/fanfix/supported/E621.java index 2455c87..bc6ba5b 100644 --- a/src/be/nikiroo/fanfix/supported/E621.java +++ b/src/be/nikiroo/fanfix/supported/E621.java @@ -10,6 +10,7 @@ import java.util.Scanner; import be.nikiroo.fanfix.Instance; import be.nikiroo.fanfix.data.Chapter; +import be.nikiroo.fanfix.data.MetaData; import be.nikiroo.fanfix.data.Story; import be.nikiroo.utils.StringUtils; @@ -30,8 +31,24 @@ class E621 extends BasicSupport { } @Override - public boolean isImageDocument(URL source, InputStream in) { - return true; + protected MetaData getMeta(URL source, InputStream in) throws IOException { + MetaData meta = new MetaData(); + + meta.setTitle(getTitle(reset(in))); + meta.setAuthor(getAuthor(source, reset(in))); + meta.setDate(""); + meta.setTags(new ArrayList()); // TODDO ??? + meta.setSource(getSourceName()); + meta.setPublisher(getSourceName()); + meta.setUuid(source.toString()); + meta.setLuid(""); + meta.setLang("EN"); + meta.setSubject(""); + meta.setType(getType().toString()); + meta.setImageDocument(true); + meta.setCover(null); + + return meta; } @Override @@ -66,8 +83,7 @@ class E621 extends BasicSupport { return true; } - @Override - protected String getAuthor(URL source, InputStream in) throws IOException { + private String getAuthor(URL source, InputStream in) throws IOException { String author = getLine(in, "href=\"/post/show/", 0); if (author != null) { String key = "href=\""; @@ -105,23 +121,7 @@ class E621 extends BasicSupport { return null; } - @Override - protected String getDate(URL source, InputStream in) throws IOException { - return null; - } - - @Override - protected String getSubject(URL source, InputStream in) throws IOException { - return null; - } - - @Override - protected URL getCover(URL source, InputStream in) throws IOException { - return null; - } - - @Override - protected String getTitle(URL source, InputStream in) throws IOException { + private String getTitle(InputStream in) throws IOException { String title = getLine(in, "", 0); if (title != null) { int pos = title.indexOf('>'); @@ -137,7 +137,7 @@ class E621 extends BasicSupport { title = title.substring("Pool:".length()); } - title = title.trim(); + title = StringUtils.unhtml(title).trim(); } return title; diff --git a/src/be/nikiroo/fanfix/supported/Epub.java b/src/be/nikiroo/fanfix/supported/Epub.java index 82ebb2b..32349a8 100644 --- a/src/be/nikiroo/fanfix/supported/Epub.java +++ b/src/be/nikiroo/fanfix/supported/Epub.java @@ -6,6 +6,7 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.net.URL; +import java.util.ArrayList; import java.util.List; import java.util.Map.Entry; import java.util.zip.ZipEntry; @@ -14,7 +15,7 @@ import java.util.zip.ZipInputStream; import javax.imageio.ImageIO; import be.nikiroo.fanfix.Instance; -import be.nikiroo.fanfix.bundles.Config; +import be.nikiroo.fanfix.data.MetaData; import be.nikiroo.utils.IOUtils; import be.nikiroo.utils.MarkableFileInputStream; @@ -24,16 +25,12 @@ import be.nikiroo.utils.MarkableFileInputStream; * * @author niki */ -class Epub extends BasicSupport { - private InfoText base; - private URL fakeSource; - - private File tmpCover; - private File tmpInfo; +class Epub extends InfoText { private File tmp; + protected MetaData meta; - /** Only used by {@link Epub#getInput()} so it is always reset. */ - private InputStream in; + private URL fakeSource; + private InputStream fakeIn; @Override public String getSourceName() { @@ -50,63 +47,15 @@ class Epub extends BasicSupport { } @Override - protected boolean isHtml() { - if (tmpInfo.exists()) { - return base.isHtml(); - } - - return false; - } - - @Override - protected String getTitle(URL source, InputStream in) throws IOException { - if (tmpInfo.exists()) { - return base.getTitle(fakeSource, getFakeInput()); - } - - return source.toString(); - } - - @Override - protected String getAuthor(URL source, InputStream in) throws IOException { - if (tmpInfo.exists()) { - return base.getAuthor(fakeSource, getFakeInput()); - } - - return null; - } - - @Override - protected String getDate(URL source, InputStream in) throws IOException { - if (tmpInfo.exists()) { - return base.getDate(fakeSource, getFakeInput()); - } - - return null; - } - - @Override - protected String getSubject(URL source, InputStream in) throws IOException { - if (tmpInfo.exists()) { - return base.getSubject(fakeSource, getFakeInput()); - } - - return null; + protected MetaData getMeta(URL source, InputStream in) throws IOException { + return meta; } @Override protected String getDesc(URL source, InputStream in) throws IOException { - if (tmpInfo.exists()) { - return base.getDesc(fakeSource, getFakeInput()); - } - - return null; - } - - @Override - protected URL getCover(URL source, InputStream in) throws IOException { - if (tmpCover.exists()) { - return tmpCover.toURI().toURL(); + if (fakeIn != null) { + fakeIn.reset(); + return super.getDesc(fakeSource, fakeIn); } return null; @@ -115,8 +64,9 @@ class Epub extends BasicSupport { @Override protected List<Entry<String, URL>> getChapters(URL source, InputStream in) throws IOException { - if (tmpInfo.exists()) { - return base.getChapters(fakeSource, getFakeInput()); + if (fakeIn != null) { + fakeIn.reset(); + return super.getChapters(fakeSource, fakeIn); } return null; @@ -125,70 +75,22 @@ class Epub extends BasicSupport { @Override protected String getChapterContent(URL source, InputStream in, int number) throws IOException { - if (tmpInfo.exists()) { - return base.getChapterContent(fakeSource, getFakeInput(), number); + if (fakeIn != null) { + fakeIn.reset(); + return super.getChapterContent(fakeSource, fakeIn, number); } return null; } @Override - protected String getLang(URL source, InputStream in) throws IOException { - if (tmpInfo.exists()) { - return base.getLang(fakeSource, getFakeInput()); - } - - return super.getLang(source, in); - } - - @Override - protected String getPublisher(URL source, InputStream in) - throws IOException { - if (tmpInfo.exists()) { - return base.getPublisher(fakeSource, getFakeInput()); - } - - return super.getPublisher(source, in); - } - - @Override - protected List<String> getTags(URL source, InputStream in) - throws IOException { - if (tmpInfo.exists()) { - return base.getTags(fakeSource, getFakeInput()); - } - - return super.getTags(source, in); - } - - @Override - protected String getUuid(URL source, InputStream in) throws IOException { - if (tmpInfo.exists()) { - return base.getUuid(fakeSource, getFakeInput()); - } - - return super.getUuid(source, in); - } - - @Override - protected String getLuid(URL source, InputStream in) throws IOException { - if (tmpInfo.exists()) { - return base.getLuid(fakeSource, getFakeInput()); - } - - return super.getLuid(source, in); - } - - @Override - protected void preprocess(InputStream in) throws IOException { + protected void preprocess(URL source, InputStream in) throws IOException { // Note: do NOT close this stream, as it would also close "in" ZipInputStream zipIn = new ZipInputStream(in); tmp = File.createTempFile("fanfic-reader-parser_", ".tmp"); - tmpInfo = new File(tmp + ".info"); - tmpCover = File.createTempFile("fanfic-reader-parser_", ".tmp"); - - base = new InfoText(); + File tmpInfo = new File(tmp + ".info"); fakeSource = tmp.toURI().toURL(); + BufferedImage cover = null; for (ZipEntry entry = zipIn.getNextEntry(); entry != null; entry = zipIn .getNextEntry()) { @@ -213,10 +115,7 @@ class Epub extends BasicSupport { // Cover if (getCover()) { try { - BufferedImage image = ImageIO.read(zipIn); - ImageIO.write(image, Instance.getConfig() - .getString(Config.IMAGE_FORMAT_COVER) - .toLowerCase(), tmpCover); + cover = ImageIO.read(zipIn); } catch (Exception e) { Instance.syserr(e); } @@ -238,33 +137,36 @@ class Epub extends BasicSupport { } if (tmp.exists()) { - this.in = new MarkableFileInputStream(new FileInputStream(tmp)); + this.fakeIn = new MarkableFileInputStream(new FileInputStream(tmp)); + } + + if (tmpInfo.exists()) { + meta = InfoReader.readMeta(tmpInfo); + if (cover != null) { + meta.setCover(cover); + } + tmpInfo.delete(); + } else { + meta = new MetaData(); + meta.setUuid(source.toString()); + meta.setLang("EN"); + meta.setTags(new ArrayList<String>()); + meta.setSource(getSourceName()); } } @Override protected void close() throws IOException { - for (File file : new File[] { tmp, tmpInfo, tmpCover }) { - if (file != null && file.exists()) { - if (!file.delete()) { - file.deleteOnExit(); - } + if (tmp != null && tmp.exists()) { + if (!tmp.delete()) { + tmp.deleteOnExit(); } } tmp = null; - tmpInfo = null; - tmpCover = null; - fakeSource = null; - try { - if (in != null) { - in.close(); - } - } finally { - in = null; - base.close(); - } + fakeIn.close(); + super.close(); } protected String getDataPrefix() { @@ -278,17 +180,4 @@ class Epub extends BasicSupport { protected boolean getCover() { return true; } - - /** - * Reset then return {@link Epub#in}. - * - * @return {@link Epub#in} - * - * @throws IOException - * in case of I/O error - */ - private InputStream getFakeInput() throws IOException { - in.reset(); - return in; - } } diff --git a/src/be/nikiroo/fanfix/supported/Fanfiction.java b/src/be/nikiroo/fanfix/supported/Fanfiction.java index cbbc085..9be098d 100644 --- a/src/be/nikiroo/fanfix/supported/Fanfiction.java +++ b/src/be/nikiroo/fanfix/supported/Fanfiction.java @@ -1,5 +1,6 @@ package be.nikiroo.fanfix.supported; +import java.awt.image.BufferedImage; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; @@ -12,6 +13,7 @@ import java.util.Map.Entry; import java.util.Scanner; import be.nikiroo.fanfix.Instance; +import be.nikiroo.fanfix.data.MetaData; import be.nikiroo.utils.StringUtils; /** @@ -33,7 +35,27 @@ class Fanfiction extends BasicSupport { } @Override - protected String getSubject(URL source, InputStream in) { + protected MetaData getMeta(URL source, InputStream in) throws IOException { + MetaData meta = new MetaData(); + + meta.setTitle(getTitle(reset(in))); + meta.setAuthor(getAuthor(reset(in))); + meta.setDate(getDate(reset(in))); + meta.setTags(getTags(reset(in))); + meta.setSource(getSourceName()); + meta.setPublisher(getSourceName()); + meta.setUuid(source.toString()); + meta.setLuid(""); + meta.setLang("EN"); + meta.setSubject(getSubject(reset(in))); + meta.setType(getType().toString()); + meta.setImageDocument(false); + meta.setCover(getCover(source, reset(in))); + + return meta; + } + + private String getSubject(InputStream in) { String line = getLine(in, "id=pre_story_links", 0); if (line != null) { int pos = line.lastIndexOf('"'); @@ -41,7 +63,7 @@ class Fanfiction extends BasicSupport { line = line.substring(pos + 1); pos = line.indexOf('<'); if (pos >= 0) { - return line.substring(0, pos); + return StringUtils.unhtml(line.substring(0, pos)).trim(); } } } @@ -49,10 +71,8 @@ class Fanfiction extends BasicSupport { return null; } - @Override - protected List<String> getTags(URL source, InputStream in) - throws IOException { - List<String> tags = super.getTags(source, in); + private List<String> getTags(InputStream in) throws IOException { + List<String> tags = new ArrayList<String>(); String key = "title=\"Send Private Message\""; String line = getLine(in, key, 2); @@ -71,7 +91,7 @@ class Fanfiction extends BasicSupport { } for (String tag : line.split("-")) { - tags.add(tag.trim()); + tags.add(StringUtils.unhtml(tag).trim()); } } } @@ -80,8 +100,7 @@ class Fanfiction extends BasicSupport { return tags; } - @Override - protected String getTitle(URL source, InputStream in) { + private String getTitle(InputStream in) { int i = 0; @SuppressWarnings("resource") Scanner scan = new Scanner(in, "UTF-8"); @@ -95,7 +114,7 @@ class Fanfiction extends BasicSupport { line = line.substring("Follow/Fav".length()).trim(); } - return line; + return StringUtils.unhtml(line).trim(); } } } @@ -103,8 +122,7 @@ class Fanfiction extends BasicSupport { return null; } - @Override - protected String getAuthor(URL source, InputStream in) { + private String getAuthor(InputStream in) { int i = 0; @SuppressWarnings("resource") Scanner scan = new Scanner(in, "UTF-8"); @@ -121,8 +139,7 @@ class Fanfiction extends BasicSupport { return null; } - @Override - protected String getDate(URL source, InputStream in) { + private String getDate(InputStream in) { String key = "Published: <span data-xutime='"; String line = getLine(in, key, 0); if (line != null) { @@ -153,8 +170,7 @@ class Fanfiction extends BasicSupport { return getLine(in, "title=\"Send Private Message\"", 1); } - @Override - protected URL getCover(URL url, InputStream in) { + private BufferedImage getCover(URL url, InputStream in) { String key = "class='cimage"; String line = getLine(in, key, 0); if (line != null) { @@ -179,11 +195,7 @@ class Fanfiction extends BasicSupport { + "/" + url.getPath() + "/" + line; } - try { - return new URL(line); - } catch (MalformedURLException e) { - Instance.syserr(e); - } + return getImage(null, line); } } } diff --git a/src/be/nikiroo/fanfix/supported/Fimfiction.java b/src/be/nikiroo/fanfix/supported/Fimfiction.java index 61f61d2..a6bd475 100644 --- a/src/be/nikiroo/fanfix/supported/Fimfiction.java +++ b/src/be/nikiroo/fanfix/supported/Fimfiction.java @@ -1,5 +1,7 @@ package be.nikiroo.fanfix.supported; +import java.awt.image.BufferedImage; +import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; @@ -11,6 +13,8 @@ import java.util.Map.Entry; import java.util.Scanner; import be.nikiroo.fanfix.Instance; +import be.nikiroo.fanfix.data.MetaData; +import be.nikiroo.utils.StringUtils; /** * Support class for <a href="http://www.fimfiction.net/">FimFiction.net</a> @@ -30,8 +34,24 @@ class Fimfiction extends BasicSupport { } @Override - protected String getSubject(URL source, InputStream in) { - return "MLP"; + protected MetaData getMeta(URL source, InputStream in) throws IOException { + MetaData meta = new MetaData(); + + meta.setTitle(getTitle(reset(in))); + meta.setAuthor(getAuthor(reset(in))); + meta.setDate(getDate(reset(in))); + meta.setTags(getTags(reset(in))); + meta.setSource(getSourceName()); + meta.setPublisher(getSourceName()); + meta.setUuid(source.toString()); + meta.setLuid(""); + meta.setLang("EN"); + meta.setSubject("MLP"); + meta.setType(getType().toString()); + meta.setImageDocument(false); + meta.setCover(getCover(reset(in))); + + return meta; } @Override @@ -41,8 +61,7 @@ class Fimfiction extends BasicSupport { return cookies; } - @Override - protected List<String> getTags(URL source, InputStream in) { + private List<String> getTags(InputStream in) { List<String> tags = new ArrayList<String>(); tags.add("MLP"); @@ -71,8 +90,7 @@ class Fimfiction extends BasicSupport { return tags; } - @Override - protected String getTitle(URL source, InputStream in) { + private String getTitle(InputStream in) { String line = getLine(in, " property=\"og:title\"", 0); if (line != null) { int pos = -1; @@ -84,7 +102,7 @@ class Fimfiction extends BasicSupport { line = line.substring(pos + 1); pos = line.indexOf('"'); if (pos >= 0) { - return line.substring(0, pos); + return StringUtils.unhtml(line.substring(0, pos)).trim(); } } } @@ -92,8 +110,7 @@ class Fimfiction extends BasicSupport { return null; } - @Override - protected String getAuthor(URL source, InputStream in) { + private String getAuthor(InputStream in) { String line = getLine(in, " href=\"/user/", 0); if (line != null) { int pos = line.indexOf('"'); @@ -114,8 +131,7 @@ class Fimfiction extends BasicSupport { return null; } - @Override - protected String getDate(URL source, InputStream in) { + private String getDate(InputStream in) { String line = getLine(in, "<span class=\"date\">", 0); if (line != null) { int pos = -1; @@ -141,8 +157,7 @@ class Fimfiction extends BasicSupport { return getLine(in, "class=\"more_button hidden\"", -1); } - @Override - protected URL getCover(URL url, InputStream in) { + private BufferedImage getCover(InputStream in) { // Note: the 'og:image' is the SMALL cover, not the full version String cover = getLine(in, "<div class=\"story_image\">", 1); if (cover != null) { @@ -156,15 +171,7 @@ class Fimfiction extends BasicSupport { } } - if (cover != null) { - try { - return new URL(cover); - } catch (MalformedURLException e) { - Instance.syserr(e); - } - } - - return null; + return getImage(null, cover); } @Override diff --git a/src/be/nikiroo/fanfix/supported/InfoReader.java b/src/be/nikiroo/fanfix/supported/InfoReader.java new file mode 100644 index 0000000..1695ebd --- /dev/null +++ b/src/be/nikiroo/fanfix/supported/InfoReader.java @@ -0,0 +1,123 @@ +package be.nikiroo.fanfix.supported; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; + +import be.nikiroo.fanfix.data.MetaData; +import be.nikiroo.utils.MarkableFileInputStream; + +// not complete: no "description" tag +public class InfoReader { + public static MetaData readMeta(File infoFile) throws IOException { + if (infoFile == null) { + throw new IOException("File is null"); + } + + if (infoFile.exists()) { + InputStream in = new MarkableFileInputStream(new FileInputStream( + infoFile)); + try { + return createMeta(in); + } finally { + in.close(); + in = null; + } + } else { + throw new FileNotFoundException( + "File given as argument does not exists: " + + infoFile.getAbsolutePath()); + } + } + + private static MetaData createMeta(InputStream in) throws IOException { + MetaData meta = new MetaData(); + + meta.setTitle(getInfoTag(in, "TITLE")); + meta.setAuthor(getInfoTag(in, "AUTHOR")); + meta.setDate(getInfoTag(in, "DATE")); + meta.setTags(getInfoTagList(in, "TAGS", ",")); + meta.setSource(getInfoTag(in, "SOURCE")); + meta.setPublisher(getInfoTag(in, "PUBLISHER")); + meta.setUuid(getInfoTag(in, "UUID")); + meta.setLuid(getInfoTag(in, "LUID")); + meta.setLang(getInfoTag(in, "LANG")); + meta.setSubject(getInfoTag(in, "SUBJECT")); + meta.setType(getInfoTag(in, "TYPE")); + meta.setImageDocument(getInfoTagBoolean(in, "IMAGES_DOCUMENT", false)); + meta.setCover(BasicSupport.getImage(null, getInfoTag(in, "COVER"))); + + if (meta.getCover() == null) { + meta.setCover(BasicSupport.getDefaultCover(meta.getSubject())); + } + + return meta; + } + + private static boolean getInfoTagBoolean(InputStream in, String key, + boolean def) throws IOException { + Boolean value = getInfoTagBoolean(in, key); + return value == null ? def : value; + } + + private static Boolean getInfoTagBoolean(InputStream in, String key) + throws IOException { + String value = getInfoTag(in, key); + if (value != null && !value.trim().isEmpty()) { + value = value.toLowerCase().trim(); + return value.equals("1") || value.equals("on") + || value.equals("true") || value.equals("yes"); + } + + return null; + } + + private static List<String> getInfoTagList(InputStream in, String key, + String separator) throws IOException { + List<String> list = new ArrayList<String>(); + String tt = getInfoTag(in, key); + if (tt != null) { + for (String tag : tt.split(separator)) { + list.add(tag.trim()); + } + } + + return list; + } + + /** + * Return the value of the given tag in the <tt>.info</tt> file if present. + * + * @param key + * the tag key + * + * @return the value or NULL + * + * @throws IOException + * in case of I/O error + */ + private static String getInfoTag(InputStream in, String key) + throws IOException { + key = "^" + key + "="; + + if (in != null) { + in.reset(); + String value = BasicSupport.getLine(in, key, 0); + if (value != null && !value.isEmpty()) { + value = value.trim().substring(key.length() - 1).trim(); + if (value.startsWith("'") && value.endsWith("'") + || value.startsWith("\"") && value.endsWith("\"")) { + value = value.substring(1, value.length() - 1).trim(); + } + + return value; + } + } + + return null; + } +} diff --git a/src/be/nikiroo/fanfix/supported/InfoText.java b/src/be/nikiroo/fanfix/supported/InfoText.java index 365c51a..4f1dc39 100644 --- a/src/be/nikiroo/fanfix/supported/InfoText.java +++ b/src/be/nikiroo/fanfix/supported/InfoText.java @@ -1,15 +1,13 @@ package be.nikiroo.fanfix.supported; import java.io.File; -import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; -import java.net.MalformedURLException; import java.net.URISyntaxException; import java.net.URL; -import java.util.List; import be.nikiroo.fanfix.Instance; +import be.nikiroo.fanfix.data.MetaData; /** * Support class for <tt>.info</tt> text files ({@link Text} files with a @@ -27,160 +25,33 @@ class InfoText extends Text { } @Override - protected String getTitle(URL source, InputStream in) throws IOException { - String tag = getInfoTag(source, "TITLE"); - if (tag != null) { - return tag; - } - - return super.getTitle(source, in); - } - - @Override - protected String getAuthor(URL source, InputStream in) throws IOException { - String tag = getInfoTag(source, "AUTHOR"); - if (tag != null) { - return tag; - } - - return super.getAuthor(source, in); - } - - @Override - protected String getDate(URL source, InputStream in) throws IOException { - String tag = getInfoTag(source, "DATE"); - if (tag != null) { - return tag; - } - - return super.getDate(source, in); - } - - @Override - protected String getSubject(URL source, InputStream in) throws IOException { - String tag = getInfoTag(source, "SUBJECT"); - if (tag != null) { - return tag; - } - - return super.getSubject(source, in); - } - - @Override - protected String getLang(URL source, InputStream in) throws IOException { - String tag = getInfoTag(source, "LANG"); - if (tag != null) { - return tag; - } - - return super.getLang(source, in); - } - - @Override - protected String getPublisher(URL source, InputStream in) - throws IOException { - String tag = getInfoTag(source, "PUBLISHER"); - if (tag != null) { - return tag; - } - - return super.getPublisher(source, in); - } - - @Override - protected String getUuid(URL source, InputStream in) throws IOException { - String tag = getInfoTag(source, "UUID"); - if (tag != null) { - return tag; - } - - return super.getUuid(source, in); - } - - @Override - protected String getLuid(URL source, InputStream in) throws IOException { - String tag = getInfoTag(source, "LUID"); - if (tag != null) { - return tag; - } - - return super.getLuid(source, in); - } - - @Override - protected List<String> getTags(URL source, InputStream in) - throws IOException { - List<String> tags = super.getTags(source, in); - - String tt = getInfoTag(source, "TAGS"); - if (tt != null) { - for (String tag : tt.split(",")) { - tags.add(tag.trim()); - } - } - - return tags; - } - - @Override - public boolean isImageDocument(URL source, InputStream in) - throws IOException { - String tag = getInfoTag(source, "IMAGES_DOCUMENT"); - if (tag != null) { - return tag.trim().toLowerCase().equals("true"); - } - - return super.isImageDocument(source, in); - } - - @Override - protected URL getCover(URL source, InputStream in) { - File file; + protected MetaData getMeta(URL source, InputStream in) throws IOException { try { - file = new File(source.toURI()); - file = new File(file.getPath() + ".info"); - } catch (URISyntaxException e) { - Instance.syserr(e); - file = null; - } - - String path = null; - if (file != null && file.exists()) { - try { - InputStream infoIn = new FileInputStream(file); - try { - String key = "COVER="; - String tt = getLine(infoIn, key, 0); - if (tt != null && !tt.isEmpty()) { - tt = tt.substring(key.length()).trim(); - if (tt.startsWith("'") && tt.endsWith("'")) { - tt = tt.substring(1, tt.length() - 1).trim(); - } - - URL cover = getImage(source, tt); - if (cover != null) { - path = cover.getFile(); - } - } - } finally { - infoIn.close(); + MetaData meta = InfoReader.readMeta(new File(new File(source + .toURI()).getPath() + ".info")); + + // Some old .info files don't have this information... + String test = meta.getTitle() == null ? "" : meta.getTitle(); + test += meta.getAuthor() == null ? "" : meta.getAuthor(); + test += meta.getDate() == null ? "" : meta.getDate(); + if (test.isEmpty()) { + MetaData superMeta = super.getMeta(source, reset(in)); + if (meta.getTitle() == null || meta.getTitle().isEmpty()) { + meta.setTitle(superMeta.getTitle()); + } + if (meta.getAuthor() == null || meta.getAuthor().isEmpty()) { + meta.setAuthor(superMeta.getAuthor()); + } + if (meta.getDate() == null || meta.getDate().isEmpty()) { + meta.setDate(superMeta.getDate()); } - } catch (MalformedURLException e) { - Instance.syserr(e); - } catch (IOException e) { - Instance.syserr(e); } - } - if (path != null) { - try { - return new File(path).toURI().toURL(); - } catch (MalformedURLException e) { - Instance.syserr(e); - } - } + return meta; - return null; + } catch (URISyntaxException e) { + throw new IOException("Cannot parse URL to file: " + source, e); + } } @Override @@ -200,49 +71,4 @@ class InfoText extends Text { return false; } - - /** - * Return the value of the given tag in the <tt>.info</tt> file if present. - * - * @param source - * the source story {@link URL} - * @param key - * the tag key - * - * @return the value or NULL - * - * @throws IOException - * in case of I/O error - */ - private String getInfoTag(URL source, String key) throws IOException { - key = "^" + key + "="; - - File file; - try { - file = new File(source.toURI()); - file = new File(file.getPath() + ".info"); - } catch (URISyntaxException e) { - throw new IOException(e); - } - - if (file.exists()) { - InputStream infoIn = new FileInputStream(file); - try { - String value = getLine(infoIn, key, 0); - if (value != null && !value.isEmpty()) { - value = value.trim().substring(key.length() - 1).trim(); - if (value.startsWith("'") && value.endsWith("'") - || value.startsWith("\"") && value.endsWith("\"")) { - value = value.substring(1, value.length() - 1).trim(); - } - - return value; - } - } finally { - infoIn.close(); - } - } - - return null; - } } diff --git a/src/be/nikiroo/fanfix/supported/MangaFox.java b/src/be/nikiroo/fanfix/supported/MangaFox.java index 2c4c2f3..3e2dbad 100644 --- a/src/be/nikiroo/fanfix/supported/MangaFox.java +++ b/src/be/nikiroo/fanfix/supported/MangaFox.java @@ -1,5 +1,6 @@ package be.nikiroo.fanfix.supported; +import java.awt.image.BufferedImage; import java.io.IOException; import java.io.InputStream; import java.net.MalformedURLException; @@ -10,7 +11,10 @@ import java.util.List; import java.util.Map.Entry; import java.util.Scanner; +import javax.imageio.ImageIO; + import be.nikiroo.fanfix.Instance; +import be.nikiroo.fanfix.data.MetaData; import be.nikiroo.utils.StringUtils; class MangaFox extends BasicSupport { @@ -25,18 +29,27 @@ class MangaFox extends BasicSupport { } @Override - protected String getSubject(URL source, InputStream in) { - return "manga"; + protected MetaData getMeta(URL source, InputStream in) throws IOException { + MetaData meta = new MetaData(); + + meta.setTitle(getTitle(reset(in))); + meta.setAuthor(getAuthor(reset(in))); + meta.setDate(getDate(reset(in))); + meta.setTags(getTags(reset(in))); + meta.setSource(getSourceName()); + meta.setPublisher(getSourceName()); + meta.setUuid(source.toString()); + meta.setLuid(""); + meta.setLang("EN"); + meta.setSubject("manga"); + meta.setType(getType().toString()); + meta.setImageDocument(true); + meta.setCover(getCover(reset(in))); + + return meta; } - @Override - public boolean isImageDocument(URL source, InputStream in) - throws IOException { - return true; - } - - @Override - protected List<String> getTags(URL source, InputStream in) { + private List<String> getTags(InputStream in) { List<String> tags = new ArrayList<String>(); String line = getLine(in, "/genres/", 0); @@ -53,8 +66,7 @@ class MangaFox extends BasicSupport { return tags; } - @Override - protected String getTitle(URL source, InputStream in) { + private String getTitle(InputStream in) { String line = getLine(in, " property=\"og:title\"", 0); if (line != null) { int pos = -1; @@ -74,8 +86,7 @@ class MangaFox extends BasicSupport { return null; } - @Override - protected String getAuthor(URL source, InputStream in) { + private String getAuthor(InputStream in) { List<String> authors = new ArrayList<String>(); String line = getLine(in, "/author/", 0, false); @@ -120,8 +131,7 @@ class MangaFox extends BasicSupport { } } - @Override - protected String getDate(URL source, InputStream in) { + private String getDate(InputStream in) { String line = getLine(in, "/released/", 0); if (line != null) { line = StringUtils.unhtml(line); @@ -152,8 +162,7 @@ class MangaFox extends BasicSupport { return null; } - @Override - protected URL getCover(URL url, InputStream in) { + private BufferedImage getCover(InputStream in) { String line = getLine(in, " property=\"og:image\"", 0); String cover = null; if (line != null) { @@ -172,10 +181,15 @@ class MangaFox extends BasicSupport { } if (cover != null) { + InputStream coverIn; try { - return new URL(cover); - } catch (MalformedURLException e) { - Instance.syserr(e); + coverIn = openEx(cover); + try { + return ImageIO.read(coverIn); + } finally { + coverIn.close(); + } + } catch (IOException e) { } } diff --git a/src/be/nikiroo/fanfix/supported/Text.java b/src/be/nikiroo/fanfix/supported/Text.java index f1ee71c..4bb86fe 100644 --- a/src/be/nikiroo/fanfix/supported/Text.java +++ b/src/be/nikiroo/fanfix/supported/Text.java @@ -1,5 +1,6 @@ package be.nikiroo.fanfix.supported; +import java.awt.image.BufferedImage; import java.io.File; import java.io.IOException; import java.io.InputStream; @@ -12,6 +13,7 @@ import java.util.Scanner; import be.nikiroo.fanfix.Instance; import be.nikiroo.fanfix.bundles.Config; +import be.nikiroo.fanfix.data.MetaData; /** * Support class for local stories encoded in textual format, with a few rules: @@ -42,13 +44,27 @@ class Text extends BasicSupport { } @Override - protected String getPublisher(URL source, InputStream in) - throws IOException { - return ""; + protected MetaData getMeta(URL source, InputStream in) throws IOException { + MetaData meta = new MetaData(); + + meta.setTitle(getTitle(reset(in))); + meta.setAuthor(getAuthor(reset(in))); + meta.setDate(getDate(reset(in))); + meta.setTags(new ArrayList<String>()); + meta.setSource(getSourceName()); + meta.setPublisher(""); // often sourceName + meta.setUuid(source.toString()); + meta.setLuid(""); + meta.setLang(getLang(source, reset(in))); // default is EN + meta.setSubject(getSubject(source)); + meta.setType(getType().toString()); + meta.setImageDocument(false); + meta.setCover(getCover(source)); + + return meta; } - @Override - protected String getSubject(URL source, InputStream in) throws IOException { + private String getSubject(URL source) throws IOException { try { File file = new File(source.toURI()); return file.getParentFile().getName(); @@ -59,8 +75,7 @@ class Text extends BasicSupport { } - @Override - protected String getLang(URL source, InputStream in) throws IOException { + private String getLang(URL source, InputStream in) throws IOException { @SuppressWarnings("resource") Scanner scan = new Scanner(in, "UTF-8"); scan.useDelimiter("\\n"); @@ -73,7 +88,7 @@ class Text extends BasicSupport { String lang = detectChapter(chapter0); if (lang == null) { - lang = super.getLang(source, in); + lang = "EN"; } else { lang = lang.toUpperCase(); } @@ -81,16 +96,14 @@ class Text extends BasicSupport { return lang; } - @Override - protected String getTitle(URL source, InputStream in) throws IOException { + private String getTitle(InputStream in) throws IOException { @SuppressWarnings("resource") Scanner scan = new Scanner(in, "UTF-8"); scan.useDelimiter("\\n"); return scan.next(); } - @Override - protected String getAuthor(URL source, InputStream in) throws IOException { + private String getAuthor(InputStream in) throws IOException { @SuppressWarnings("resource") Scanner scan = new Scanner(in, "UTF-8"); scan.useDelimiter("\\n"); @@ -103,11 +116,10 @@ class Text extends BasicSupport { author = authorDate.substring(0, pos); } - return author; + return fixAuthor(author); } - @Override - protected String getDate(URL source, InputStream in) throws IOException { + private String getDate(InputStream in) throws IOException { @SuppressWarnings("resource") Scanner scan = new Scanner(in, "UTF-8"); scan.useDelimiter("\\n"); @@ -128,12 +140,11 @@ class Text extends BasicSupport { } @Override - protected String getDesc(URL source, InputStream in) { + protected String getDesc(URL source, InputStream in) throws IOException { return getChapterContent(source, in, 0); } - @Override - protected URL getCover(URL source, InputStream in) { + private BufferedImage getCover(URL source) throws IOException { String path; try { path = new File(source.toURI()).getPath(); @@ -152,7 +163,8 @@ class Text extends BasicSupport { } @Override - protected List<Entry<String, URL>> getChapters(URL source, InputStream in) { + protected List<Entry<String, URL>> getChapters(URL source, InputStream in) + throws IOException { List<Entry<String, URL>> chaps = new ArrayList<Entry<String, URL>>(); @SuppressWarnings("resource") Scanner scan = new Scanner(in, "UTF-8"); @@ -195,29 +207,23 @@ class Text extends BasicSupport { } @Override - protected String getChapterContent(URL source, InputStream in, int number) { + protected String getChapterContent(URL source, InputStream in, int number) + throws IOException { StringBuilder builder = new StringBuilder(); @SuppressWarnings("resource") Scanner scan = new Scanner(in, "UTF-8"); scan.useDelimiter("\\n"); boolean inChap = false; - boolean prevLineEmpty = false; while (scan.hasNext()) { String line = scan.next(); - if (prevLineEmpty) { - if (detectChapter(line, number) != null) { - inChap = true; - } else if (inChap) { - if (prevLineEmpty && detectChapter(line) != null) { - break; - } - - builder.append(line); - builder.append("\n"); - } + if (detectChapter(line, number) != null) { + inChap = true; + } else if (inChap && detectChapter(line) != null) { + break; + } else if (inChap) { + builder.append(line); + builder.append("\n"); } - - prevLineEmpty = line.trim().isEmpty(); } return builder.toString(); -- 2.27.0