From: Niki Roo Date: Thu, 29 Mar 2018 06:56:26 +0000 (+0200) Subject: Merge branch WIP back into master: X-Git-Tag: fanfix-1.7.0~2 X-Git-Url: http://git.nikiroo.be/?p=fanfix.git;a=commitdiff_plain;h=6bfa27805ec903223e6e45c6f4d00e94629a064c;hp=a1a900a5f36565a6b944504a1c7c22c2915748a3 Merge branch WIP back into master: - use BasicSupport for all Supports working on files --- diff --git a/src/be/nikiroo/fanfix/output/Cbz.java b/src/be/nikiroo/fanfix/output/Cbz.java index 490ba8f..3d90082 100644 --- a/src/be/nikiroo/fanfix/output/Cbz.java +++ b/src/be/nikiroo/fanfix/output/Cbz.java @@ -73,7 +73,7 @@ class Cbz extends BasicOutput { new FileOutputStream(new File(dir, "URL")), "UTF-8")); try { if (meta != null) { - writer.write(meta.getUuid()); + writer.write(meta.getUrl()); } } finally { writer.close(); diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport.java b/src/be/nikiroo/fanfix/supported/BasicSupport.java index 4314b99..8154a15 100644 --- a/src/be/nikiroo/fanfix/supported/BasicSupport.java +++ b/src/be/nikiroo/fanfix/supported/BasicSupport.java @@ -214,8 +214,8 @@ public abstract class BasicSupport { /** * Open an input link that will be used for the support. *

- * Can return NULL, in which case you are supposed to work without an - * {@link InputStream}. + * Can return NULL, in which case you are supposed to work without a source + * node. * * @param source * the source {@link URL} @@ -237,20 +237,9 @@ public abstract class BasicSupport { * @throws IOException * in case of I/O error */ - @SuppressWarnings("unused") protected void login() throws IOException { } - /** - * Prepare the support if needed before processing. - * - * @throws IOException - * on I/O error - */ - @SuppressWarnings("unused") - protected void preprocess() throws IOException { - } - /** * Now that we have processed the {@link Story}, close the resources if any. */ @@ -267,10 +256,9 @@ public abstract class BasicSupport { * @throws IOException * in case of I/O error */ - public Story processMeta() throws IOException { + public final Story processMeta() throws IOException { Story story = null; - preprocess(); try { story = processMeta(false, null); } finally { @@ -332,6 +320,32 @@ public abstract class BasicSupport { return story; } + /** + * Actual processing step, without the calls to other methods. + *

+ * Will convert the story resource into a fully filled {@link Story} object. + * + * @param pg + * the optional progress reporter + * + * @return the {@link Story}, never NULL + * + * @throws IOException + * in case of I/O error + */ + // TODO: add final + public Story process(Progress pg) throws IOException { + setCurrentReferer(source); + login(); + sourceNode = loadDocument(source); + + try { + return doProcess(pg); + } finally { + close(); + } + } + /** * Process the given story resource into a fully filled {@link Story} * object. @@ -344,87 +358,78 @@ public abstract class BasicSupport { * @throws IOException * in case of I/O error */ - public Story process(Progress pg) throws IOException { + public Story doProcess(Progress pg) throws IOException { if (pg == null) { pg = new Progress(); } else { pg.setMinMax(0, 100); } - setCurrentReferer(source); - login(); - sourceNode = loadDocument(source); - pg.setProgress(1); - try { - Progress pgMeta = new Progress(); - pg.addProgress(pgMeta, 10); - preprocess(); - Story story = processMeta(true, pgMeta); - if (!pgMeta.isDone()) { - pgMeta.setProgress(pgMeta.getMax()); // 10% - } + Progress pgMeta = new Progress(); + pg.addProgress(pgMeta, 10); + Story story = processMeta(true, pgMeta); + if (!pgMeta.isDone()) { + pgMeta.setProgress(pgMeta.getMax()); // 10% + } - pg.setName("Retrieving " + story.getMeta().getTitle()); + pg.setName("Retrieving " + story.getMeta().getTitle()); - Progress pgGetChapters = new Progress(); - pg.addProgress(pgGetChapters, 10); - story.setChapters(new ArrayList()); - List> chapters = getChapters(pgGetChapters); - if (!pgGetChapters.isDone()) { - pgGetChapters.setProgress(pgGetChapters.getMax()); // 20% - } + Progress pgGetChapters = new Progress(); + pg.addProgress(pgGetChapters, 10); + story.setChapters(new ArrayList()); + List> chapters = getChapters(pgGetChapters); + if (!pgGetChapters.isDone()) { + pgGetChapters.setProgress(pgGetChapters.getMax()); // 20% + } + + if (chapters != null) { + Progress pgChaps = new Progress("Extracting chapters", 0, + chapters.size() * 300); + pg.addProgress(pgChaps, 80); + + long words = 0; + int i = 1; + for (Entry chap : chapters) { + pgChaps.setName("Extracting chapter " + i); + URL chapUrl = chap.getValue(); + String chapName = chap.getKey(); + if (chapUrl != null) { + setCurrentReferer(chapUrl); + } + + pgChaps.setProgress(i * 100); + Progress pgGetChapterContent = new Progress(); + Progress pgMakeChapter = new Progress(); + pgChaps.addProgress(pgGetChapterContent, 100); + pgChaps.addProgress(pgMakeChapter, 100); + + String content = getChapterContent(chapUrl, i, + pgGetChapterContent); + if (!pgGetChapterContent.isDone()) { + pgGetChapterContent.setProgress(pgGetChapterContent + .getMax()); + } - if (chapters != null) { - Progress pgChaps = new Progress("Extracting chapters", 0, - chapters.size() * 300); - pg.addProgress(pgChaps, 80); - - long words = 0; - int i = 1; - for (Entry chap : chapters) { - pgChaps.setName("Extracting chapter " + i); - URL chapUrl = chap.getValue(); - String chapName = chap.getKey(); - if (chapUrl != null) { - setCurrentReferer(chapUrl); - } - - pgChaps.setProgress(i * 100); - Progress pgGetChapterContent = new Progress(); - Progress pgMakeChapter = new Progress(); - pgChaps.addProgress(pgGetChapterContent, 100); - pgChaps.addProgress(pgMakeChapter, 100); - - String content = getChapterContent(chapUrl, i, - pgGetChapterContent); - if (!pgGetChapterContent.isDone()) { - pgGetChapterContent.setProgress(pgGetChapterContent - .getMax()); - } - - Chapter cc = BasicSupportPara.makeChapter(this, chapUrl, i, - chapName, content, isHtml(), pgMakeChapter); - if (!pgMakeChapter.isDone()) { - pgMakeChapter.setProgress(pgMakeChapter.getMax()); - } - - words += cc.getWords(); - story.getChapters().add(cc); - story.getMeta().setWords(words); - - i++; + Chapter cc = BasicSupportPara.makeChapter(this, chapUrl, i, + chapName, content, isHtml(), pgMakeChapter); + if (!pgMakeChapter.isDone()) { + pgMakeChapter.setProgress(pgMakeChapter.getMax()); } - pgChaps.setName("Extracting chapters"); - } else { - pg.setProgress(80); + words += cc.getWords(); + story.getChapters().add(cc); + story.getMeta().setWords(words); + + i++; } - return story; - } finally { - close(); + pgChaps.setName("Extracting chapters"); + } else { + pg.setProgress(80); } + + return story; } /** diff --git a/src/be/nikiroo/fanfix/supported/BasicSupportImages.java b/src/be/nikiroo/fanfix/supported/BasicSupportImages.java new file mode 100644 index 0000000..85b79c7 --- /dev/null +++ b/src/be/nikiroo/fanfix/supported/BasicSupportImages.java @@ -0,0 +1,161 @@ +package be.nikiroo.fanfix.supported; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.net.MalformedURLException; +import java.net.URL; + +import be.nikiroo.fanfix.Instance; +import be.nikiroo.utils.Image; + +public class BasicSupportImages { + /** + * Check if the given resource can be a local image or a remote image, then + * refresh the cache with it if it is. + * + * @param dir + * the local directory to search, if any + * @param line + * the resource to check + * + * @return the image if found, or NULL + * + */ + static Image getImage(BasicSupport support, File dir, String line) { + URL url = getImageUrl(support, dir, line); + if (url != null) { + if ("file".equals(url.getProtocol())) { + if (new File(url.getPath()).isDirectory()) { + return null; + } + } + InputStream in = null; + try { + in = Instance.getCache().open(url, support, true); + return new Image(in); + } catch (IOException e) { + } finally { + if (in != null) { + try { + in.close(); + } catch (IOException e) { + } + } + } + } + + return null; + } + + /** + * Check if the given resource can be a local image or a remote image, then + * refresh the cache with it if it is. + * + * @param dir + * the local directory to search, if any + * @param line + * the resource to check + * + * @return the image URL if found, or NULL + * + */ + static URL getImageUrl(BasicSupport support, File dir, String line) { + URL url = null; + + if (line != null) { + // try for files + if (dir != null && dir.exists() && !dir.isFile()) { + try { + + String relPath = null; + String absPath = null; + try { + relPath = new File(dir, line.trim()).getAbsolutePath(); + } catch (Exception e) { + // Cannot be converted to path (one possibility to take + // into account: absolute path on Windows) + } + try { + absPath = new File(line.trim()).getAbsolutePath(); + } catch (Exception e) { + // Cannot be converted to path (at all) + } + + for (String ext : getImageExt(true)) { + File absFile = new File(absPath + ext); + File relFile = new File(relPath + ext); + if (absPath != null && absFile.exists() + && absFile.isFile()) { + url = absFile.toURI().toURL(); + } else if (relPath != null && relFile.exists() + && relFile.isFile()) { + url = relFile.toURI().toURL(); + } + } + } catch (Exception e) { + // Should not happen since we control the correct arguments + } + } + + if (url == null) { + // try for URLs + try { + for (String ext : getImageExt(true)) { + if (Instance.getCache() + .check(new URL(line + ext), true)) { + url = new URL(line + ext); + break; + } + } + + // try out of cache + if (url == null) { + for (String ext : getImageExt(true)) { + try { + url = new URL(line + ext); + Instance.getCache().refresh(url, support, true); + break; + } catch (IOException e) { + // no image with this ext + url = null; + } + } + } + } catch (MalformedURLException e) { + // Not an url + } + } + + // refresh the cached file + if (url != null) { + try { + Instance.getCache().refresh(url, support, true); + } catch (IOException e) { + // woops, broken image + url = null; + } + } + } + + return url; + } + + /** + * Return the list of supported image extensions. + * + * @param emptyAllowed + * TRUE to allow an empty extension on first place, which can be + * used when you may already have an extension in your input but + * are not sure about it + * + * @return the extensions + */ + static String[] getImageExt(boolean emptyAllowed) { + if (emptyAllowed) { + return new String[] { "", ".png", ".jpg", ".jpeg", ".gif", ".bmp" }; + } + + return new String[] { ".png", ".jpg", ".jpeg", ".gif", ".bmp" }; + } +} diff --git a/src/be/nikiroo/fanfix/supported/BasicSupport_Deprecated.java b/src/be/nikiroo/fanfix/supported/BasicSupport_Deprecated.java index 591ba58..e22724a 100644 --- a/src/be/nikiroo/fanfix/supported/BasicSupport_Deprecated.java +++ b/src/be/nikiroo/fanfix/supported/BasicSupport_Deprecated.java @@ -75,7 +75,6 @@ public abstract class BasicSupport_Deprecated extends BasicSupport { throw new RuntimeException("should not be used by legacy code"); } - @Override public Story process(Progress pg) throws IOException { return process(getSource(), pg); } diff --git a/src/be/nikiroo/fanfix/supported/Cbz.java b/src/be/nikiroo/fanfix/supported/Cbz.java index f635a17..948a2d6 100644 --- a/src/be/nikiroo/fanfix/supported/Cbz.java +++ b/src/be/nikiroo/fanfix/supported/Cbz.java @@ -1,7 +1,9 @@ package be.nikiroo.fanfix.supported; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; +import java.io.InputStream; import java.net.URL; import java.util.ArrayList; import java.util.Collections; @@ -13,10 +15,12 @@ import java.util.zip.ZipInputStream; import be.nikiroo.fanfix.Instance; import be.nikiroo.fanfix.data.Chapter; +import be.nikiroo.fanfix.data.MetaData; import be.nikiroo.fanfix.data.Paragraph; import be.nikiroo.fanfix.data.Story; import be.nikiroo.utils.IOUtils; import be.nikiroo.utils.Image; +import be.nikiroo.utils.MarkableFileInputStream; import be.nikiroo.utils.Progress; /** @@ -57,7 +61,7 @@ class Cbz extends Epub { } @Override - public Story process(URL url, Progress pg) throws IOException { + public Story doProcess(Progress pg) throws IOException { if (pg == null) { pg = new Progress(); } else { @@ -66,22 +70,28 @@ class Cbz extends Epub { Progress pgMeta = new Progress(); pg.addProgress(pgMeta, 10); - Story story = processMeta(url, false, true, pgMeta); + Story story = processMeta(true, pgMeta); + MetaData meta = story.getMeta(); + pgMeta.done(); // 10% File tmpDir = Instance.getTempFiles().createTempDir("info-text"); String basename = null; Map images = new HashMap(); + InputStream cbzIn = null; + ZipInputStream zipIn = null; try { - ZipInputStream zipIn = new ZipInputStream(getInput()); + cbzIn = new MarkableFileInputStream(new FileInputStream( + getSourceFileOriginal())); + zipIn = new ZipInputStream(cbzIn); for (ZipEntry entry = zipIn.getNextEntry(); entry != null; entry = zipIn .getNextEntry()) { if (!entry.isDirectory() && entry.getName().startsWith(getDataPrefix())) { String entryLName = entry.getName().toLowerCase(); boolean imageEntry = false; - for (String ext : getImageExt(false)) { + for (String ext : BasicSupportImages.getImageExt(false)) { if (entryLName.endsWith(ext)) { imageEntry = true; } @@ -116,23 +126,12 @@ class Cbz extends Epub { pg.setProgress(90); - File txt = new File(tmpDir, basename + ".txt"); - if (!txt.exists()) { - basename = null; - } - if (basename != null) { - try { - BasicSupport support = BasicSupport.getSupport(txt.toURI() - .toURL()); - Story origStory = support.process(null); - story.setChapters(origStory.getChapters()); - story.setMeta(origStory.getMeta()); - } catch (Exception e) { - basename = null; - } - } - - if (basename == null) { + // include original story + Story origStory = getStoryFromTxt(tmpDir, basename); + if (origStory != null) { + story.setChapters(origStory.getChapters()); + story.setMeta(origStory.getMeta()); + } else { story.setChapters(new ArrayList()); } @@ -157,9 +156,36 @@ class Cbz extends Epub { } finally { IOUtils.deltree(tmpDir); + if (zipIn != null) { + zipIn.close(); + } + if (cbzIn != null) { + cbzIn.close(); + } } pg.setProgress(100); return story; } + + private Story getStoryFromTxt(File tmpDir, String basename) { + Story origStory = null; + + File txt = new File(tmpDir, basename + ".txt"); + if (!txt.exists()) { + basename = null; + } + if (basename != null) { + try { + BasicSupport support = BasicSupport.getSupport(txt.toURI() + .toURL()); + origStory = support.process(null); + } catch (Exception e) { + basename = null; + } + } + + return origStory; + + } } diff --git a/src/be/nikiroo/fanfix/supported/Epub.java b/src/be/nikiroo/fanfix/supported/Epub.java index 794998e..5a1fe43 100644 --- a/src/be/nikiroo/fanfix/supported/Epub.java +++ b/src/be/nikiroo/fanfix/supported/Epub.java @@ -4,20 +4,20 @@ import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.net.URISyntaxException; import java.net.URL; import java.net.URLDecoder; import java.util.ArrayList; -import java.util.List; -import java.util.Map.Entry; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; +import org.jsoup.nodes.Document; + import be.nikiroo.fanfix.Instance; import be.nikiroo.fanfix.data.MetaData; import be.nikiroo.utils.IOUtils; import be.nikiroo.utils.Image; import be.nikiroo.utils.MarkableFileInputStream; -import be.nikiroo.utils.Progress; import be.nikiroo.utils.StringUtils; /** @@ -27,9 +27,8 @@ import be.nikiroo.utils.StringUtils; * @author niki */ class Epub extends InfoText { - protected MetaData meta; + private MetaData meta; private File tmpDir; - private File tmp; private String desc; private URL fakeSource; @@ -40,166 +39,185 @@ class Epub extends InfoText { return "epub"; } - @Override - protected boolean supports(URL url) { - if (url.getPath().toLowerCase().endsWith(".epub")) { - return true; - } - - return false; - } - - @Override - protected MetaData getMeta(URL source, InputStream in) throws IOException { - return meta; + public File getSourceFileOriginal() { + return super.getSourceFile(); } @Override - protected String getDesc(URL source, InputStream in) throws IOException { - if (desc != null) { - return desc; - } - - if (fakeIn != null) { - fakeIn.reset(); - return super.getDesc(fakeSource, fakeIn); + protected File getSourceFile() { + try { + return new File(fakeSource.toURI()); + } catch (URISyntaxException e) { + Instance.getTraceHandler() + .error(new IOException( + "Cannot get the source file from the info-text URL", + e)); } return null; } @Override - protected List> getChapters(URL source, InputStream in, - Progress pg) throws IOException { + protected InputStream getInput() { if (fakeIn != null) { - fakeIn.reset(); - return super.getChapters(fakeSource, fakeIn, pg); + try { + fakeIn.reset(); + } catch (IOException e) { + Instance.getTraceHandler() + .error(new IOException( + "Cannot reset the Epub Text stream", e)); + } + + return fakeIn; } return null; } @Override - protected String getChapterContent(URL source, InputStream in, int number, - Progress pg) throws IOException { - if (fakeIn != null) { - fakeIn.reset(); - return super.getChapterContent(fakeSource, fakeIn, number, pg); - } + protected boolean supports(URL url) { + return url.getPath().toLowerCase().endsWith(".epub"); + } - return null; + @Override + protected MetaData getMeta() throws IOException { + return meta; } @Override - protected void preprocess(URL source, InputStream in) throws IOException { - // Note: do NOT close this stream, as it would also close "in" - ZipInputStream zipIn = new ZipInputStream(in); - tmpDir = Instance.getTempFiles().createTempDir("fanfic-reader-parser"); - tmp = new File(tmpDir, "file.txt"); - File tmpInfo = new File(tmpDir, "file.info"); - fakeSource = tmp.toURI().toURL(); - Image cover = null; - - String url = source.toString(); - String title = null; - String author = null; - - for (ZipEntry entry = zipIn.getNextEntry(); entry != null; entry = zipIn - .getNextEntry()) { - if (!entry.isDirectory() - && entry.getName().startsWith(getDataPrefix())) { - String entryLName = entry.getName().toLowerCase(); - - boolean imageEntry = false; - for (String ext : getImageExt(false)) { - if (entryLName.endsWith(ext)) { - imageEntry = true; + protected Document loadDocument(URL source) throws IOException { + super.loadDocument(source); // prepares super.getSourceFile() and + // super.getInput() + + InputStream in = super.getInput(); + ZipInputStream zipIn = null; + try { + zipIn = new ZipInputStream(in); + tmpDir = Instance.getTempFiles().createTempDir( + "fanfic-reader-parser"); + File tmp = new File(tmpDir, "file.txt"); + File tmpInfo = new File(tmpDir, "file.info"); + + fakeSource = tmp.toURI().toURL(); + Image cover = null; + + String url; + try { + url = getSource().toURI().toURL().toString(); + } catch (URISyntaxException e1) { + url = getSource().toString(); + } + String title = null; + String author = null; + + for (ZipEntry entry = zipIn.getNextEntry(); entry != null; entry = zipIn + .getNextEntry()) { + if (!entry.isDirectory() + && entry.getName().startsWith(getDataPrefix())) { + String entryLName = entry.getName().toLowerCase(); + + boolean imageEntry = false; + for (String ext : BasicSupportImages.getImageExt(false)) { + if (entryLName.endsWith(ext)) { + imageEntry = true; + } } - } - if (entry.getName().equals(getDataPrefix() + "version")) { - // Nothing to do for now ("first" - // version is 3.0) - } else if (entryLName.endsWith(".info")) { - // Info file - IOUtils.write(zipIn, tmpInfo); - } else if (imageEntry) { - // Cover - if (getCover()) { - try { - cover = new Image(zipIn); - } catch (Exception e) { - Instance.getTraceHandler().error(e); + if (entry.getName().equals(getDataPrefix() + "version")) { + // Nothing to do for now ("first" + // version is 3.0) + } else if (entryLName.endsWith(".info")) { + // Info file + IOUtils.write(zipIn, tmpInfo); + } else if (imageEntry) { + // Cover + if (getCover()) { + try { + cover = new Image(zipIn); + } catch (Exception e) { + Instance.getTraceHandler().error(e); + } } - } - } else if (entry.getName().equals(getDataPrefix() + "URL")) { - String[] descArray = StringUtils - .unhtml(IOUtils.readSmallStream(zipIn)).trim() - .split("\n"); - if (descArray.length > 0) { - url = descArray[0].trim(); - } - } else if (entry.getName().equals(getDataPrefix() + "SUMMARY")) { - String[] descArray = StringUtils - .unhtml(IOUtils.readSmallStream(zipIn)).trim() - .split("\n"); - int skip = 0; - if (descArray.length > 1) { - title = descArray[0].trim(); - skip = 1; - if (descArray.length > 2 - && descArray[1].startsWith("©")) { - author = descArray[1].substring(1).trim(); - skip = 2; + } else if (entry.getName().equals(getDataPrefix() + "URL")) { + String[] descArray = StringUtils + .unhtml(IOUtils.readSmallStream(zipIn)).trim() + .split("\n"); + if (descArray.length > 0) { + url = descArray[0].trim(); + } + } else if (entry.getName().equals( + getDataPrefix() + "SUMMARY")) { + String[] descArray = StringUtils + .unhtml(IOUtils.readSmallStream(zipIn)).trim() + .split("\n"); + int skip = 0; + if (descArray.length > 1) { + title = descArray[0].trim(); + skip = 1; + if (descArray.length > 2 + && descArray[1].startsWith("©")) { + author = descArray[1].substring(1).trim(); + skip = 2; + } + } + this.desc = ""; + for (int i = skip; i < descArray.length; i++) { + this.desc += descArray[i].trim() + "\n"; } - } - this.desc = ""; - for (int i = skip; i < descArray.length; i++) { - this.desc += descArray[i].trim() + "\n"; - } - this.desc = this.desc.trim(); - } else { - // Hopefully the data file - IOUtils.write(zipIn, tmp); + this.desc = this.desc.trim(); + } else { + // Hopefully the data file + IOUtils.write(zipIn, tmp); + } } } - } - - if (requireInfo() && (!tmp.exists() || !tmpInfo.exists())) { - throw new IOException( - "file not supported (maybe not created with this program or corrupt)"); - } - if (tmp.exists()) { - this.fakeIn = new MarkableFileInputStream(new FileInputStream(tmp)); - } + if (requireInfo() && (!tmp.exists() || !tmpInfo.exists())) { + throw new IOException( + "file not supported (maybe not created with this program or corrupt)"); + } - if (tmpInfo.exists()) { - meta = InfoReader.readMeta(tmpInfo, true); - if (cover != null) { - meta.setCover(cover); + if (tmp.exists()) { + this.fakeIn = new MarkableFileInputStream(new FileInputStream( + tmp)); } - tmpInfo.delete(); - } else { - if (title == null || title.isEmpty()) { - title = new File(source.getPath()).getName(); - if (title.toLowerCase().endsWith(".cbz")) { - title = title.substring(0, title.length() - 4); + + if (tmpInfo.exists()) { + meta = InfoReader.readMeta(tmpInfo, true); + if (cover != null) { + meta.setCover(cover); + } + tmpInfo.delete(); + } else { + if (title == null || title.isEmpty()) { + title = getSourceFileOriginal().getName(); + if (title.toLowerCase().endsWith(".cbz")) { + title = title.substring(0, title.length() - 4); + } + title = URLDecoder.decode(title, "UTF-8").trim(); } - title = URLDecoder.decode(title, "UTF-8").trim(); - } - meta = new MetaData(); - meta.setLang("en"); - meta.setTags(new ArrayList()); - meta.setSource(getSourceName()); - meta.setUuid(url); - meta.setUrl(url); - meta.setTitle(title); - meta.setAuthor(author); - meta.setImageDocument(isImagesDocumentByDefault()); + meta = new MetaData(); + meta.setLang("en"); + meta.setTags(new ArrayList()); + meta.setSource(getSourceName()); + meta.setUuid(url); + meta.setUrl(url); + meta.setTitle(title); + meta.setAuthor(author); + meta.setImageDocument(isImagesDocumentByDefault()); + } + } finally { + if (zipIn != null) { + zipIn.close(); + } + if (in != null) { + in.close(); + } } + + return null; } @Override @@ -209,15 +227,6 @@ class Epub extends InfoText { } tmpDir = null; - tmp = null; - - if (fakeIn != null) { - try { - fakeIn.close(); - } catch (Exception e) { - Instance.getTraceHandler().error(e); - } - } super.close(); } diff --git a/src/be/nikiroo/fanfix/supported/InfoReader.java b/src/be/nikiroo/fanfix/supported/InfoReader.java index 5203cc8..8e1c385 100644 --- a/src/be/nikiroo/fanfix/supported/InfoReader.java +++ b/src/be/nikiroo/fanfix/supported/InfoReader.java @@ -8,6 +8,7 @@ import java.io.InputStream; import java.net.URL; import java.util.ArrayList; import java.util.List; +import java.util.Scanner; import be.nikiroo.fanfix.Instance; import be.nikiroo.fanfix.bundles.Config; @@ -29,7 +30,6 @@ public class InfoReader { return createMeta(infoFile.toURI().toURL(), in, withCover); } finally { in.close(); - in = null; } } @@ -138,7 +138,7 @@ public class InfoReader { if (in != null) { in.reset(); - String value = BasicSupport_Deprecated.getLine(in, key, 0); + String value = getLine(in, key, 0); if (value != null && !value.isEmpty()) { value = value.trim().substring(key.length() - 1).trim(); if (value.startsWith("'") && value.endsWith("'") @@ -152,4 +152,81 @@ public class InfoReader { return null; } + + /** + * Return the first line from the given input which correspond to the given + * selectors. + * + * @param in + * the input + * @param needle + * a string that must be found inside the target line (also + * supports "^" at start to say "only if it starts with" the + * needle) + * @param relativeLine + * the line to return based upon the target line position (-1 = + * the line before, 0 = the target line...) + * + * @return the line + */ + static private String getLine(InputStream in, String needle, + int relativeLine) { + return getLine(in, needle, relativeLine, true); + } + + /** + * Return a line from the given input which correspond to the given + * selectors. + * + * @param in + * the input + * @param needle + * a string that must be found inside the target line (also + * supports "^" at start to say "only if it starts with" the + * needle) + * @param relativeLine + * the line to return based upon the target line position (-1 = + * the line before, 0 = the target line...) + * @param first + * takes the first result (as opposed to the last one, which will + * also always spend the input) + * + * @return the line + */ + static private String getLine(InputStream in, String needle, + int relativeLine, boolean first) { + String rep = null; + + List lines = new ArrayList(); + @SuppressWarnings("resource") + Scanner scan = new Scanner(in, "UTF-8"); + int index = -1; + scan.useDelimiter("\\n"); + while (scan.hasNext()) { + lines.add(scan.next()); + + if (index == -1) { + if (needle.startsWith("^")) { + if (lines.get(lines.size() - 1).startsWith( + needle.substring(1))) { + index = lines.size() - 1; + } + + } else { + if (lines.get(lines.size() - 1).contains(needle)) { + index = lines.size() - 1; + } + } + } + + if (index >= 0 && index + relativeLine < lines.size()) { + rep = lines.get(index + relativeLine); + if (first) { + break; + } + } + } + + return rep; + } } diff --git a/src/be/nikiroo/fanfix/supported/InfoText.java b/src/be/nikiroo/fanfix/supported/InfoText.java index 786e771..37f447a 100644 --- a/src/be/nikiroo/fanfix/supported/InfoText.java +++ b/src/be/nikiroo/fanfix/supported/InfoText.java @@ -2,8 +2,6 @@ package be.nikiroo.fanfix.supported; import java.io.File; import java.io.IOException; -import java.io.InputStream; -import java.net.URISyntaxException; import java.net.URL; import be.nikiroo.fanfix.data.MetaData; @@ -23,40 +21,36 @@ class InfoText extends Text { return "info-text"; } - @Override - protected MetaData getMeta(URL source, InputStream in) throws IOException { - try { - File sourceFile = new File(source.toURI()); - sourceFile = assureNoTxt(sourceFile); - - MetaData meta = InfoReader.readMeta(new File(sourceFile.getPath() - + ".info"), true); + protected File getInfoFile() { + return new File(assureNoTxt(getSourceFile()).getPath() + ".info"); + } - // Some old .info files don't have those now required fields... - String test = meta.getTitle() == null ? "" : meta.getTitle(); - test += meta.getAuthor() == null ? "" : meta.getAuthor(); - test += meta.getDate() == null ? "" : meta.getDate(); - test += meta.getUrl() == null ? "" : meta.getUrl(); - if (test.isEmpty()) { - MetaData superMeta = super.getMeta(source, reset(in)); - if (meta.getTitle() == null || meta.getTitle().isEmpty()) { - meta.setTitle(superMeta.getTitle()); - } - if (meta.getAuthor() == null || meta.getAuthor().isEmpty()) { - meta.setAuthor(superMeta.getAuthor()); - } - if (meta.getDate() == null || meta.getDate().isEmpty()) { - meta.setDate(superMeta.getDate()); - } - if (meta.getUrl() == null || meta.getUrl().isEmpty()) { - meta.setUrl(superMeta.getUrl()); - } + @Override + protected MetaData getMeta() throws IOException { + MetaData meta = InfoReader.readMeta(getInfoFile(), true); + + // Some old .info files don't have those now required fields... + String test = meta.getTitle() == null ? "" : meta.getTitle(); + test += meta.getAuthor() == null ? "" : meta.getAuthor(); + test += meta.getDate() == null ? "" : meta.getDate(); + test += meta.getUrl() == null ? "" : meta.getUrl(); + if (test.isEmpty()) { + MetaData superMeta = super.getMeta(); + if (meta.getTitle() == null || meta.getTitle().isEmpty()) { + meta.setTitle(superMeta.getTitle()); + } + if (meta.getAuthor() == null || meta.getAuthor().isEmpty()) { + meta.setAuthor(superMeta.getAuthor()); + } + if (meta.getDate() == null || meta.getDate().isEmpty()) { + meta.setDate(superMeta.getDate()); + } + if (meta.getUrl() == null || meta.getUrl().isEmpty()) { + meta.setUrl(superMeta.getUrl()); } - - return meta; - } catch (URISyntaxException e) { - throw new IOException("Cannot parse URL to file: " + source, e); } + + return meta; } @Override diff --git a/src/be/nikiroo/fanfix/supported/Text.java b/src/be/nikiroo/fanfix/supported/Text.java index a610594..f6803cd 100644 --- a/src/be/nikiroo/fanfix/supported/Text.java +++ b/src/be/nikiroo/fanfix/supported/Text.java @@ -1,19 +1,24 @@ package be.nikiroo.fanfix.supported; import java.io.File; +import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.net.URISyntaxException; import java.net.URL; +import java.util.AbstractMap; import java.util.ArrayList; import java.util.List; import java.util.Map.Entry; import java.util.Scanner; +import org.jsoup.nodes.Document; + import be.nikiroo.fanfix.Instance; import be.nikiroo.fanfix.bundles.Config; import be.nikiroo.fanfix.data.MetaData; import be.nikiroo.utils.Image; +import be.nikiroo.utils.MarkableFileInputStream; import be.nikiroo.utils.Progress; /** @@ -33,7 +38,29 @@ import be.nikiroo.utils.Progress; * * @author niki */ -class Text extends BasicSupport_Deprecated { +class Text extends BasicSupport { + private File sourceFile; + private InputStream in; + + protected File getSourceFile() { + return sourceFile; + } + + protected InputStream getInput() { + if (in != null) { + try { + in.reset(); + } catch (IOException e) { + Instance.getTraceHandler().error( + new IOException("Cannot reset the Text stream", e)); + } + + return in; + } + + return null; + } + @Override protected boolean isHtml() { return false; @@ -45,41 +72,42 @@ class Text extends BasicSupport_Deprecated { } @Override - protected MetaData getMeta(URL source, InputStream in) throws IOException { + protected Document loadDocument(URL source) throws IOException { + try { + sourceFile = new File(source.toURI()); + in = new MarkableFileInputStream(new FileInputStream(sourceFile)); + } catch (URISyntaxException e) { + throw new IOException("Cannot load the text document: " + source); + } + + return null; + } + + @Override + protected MetaData getMeta() throws IOException { MetaData meta = new MetaData(); - meta.setTitle(getTitle(reset(in))); - meta.setAuthor(getAuthor(reset(in))); - meta.setDate(getDate(reset(in))); + meta.setTitle(getTitle()); + meta.setAuthor(getAuthor()); + meta.setDate(getDate()); meta.setTags(new ArrayList()); meta.setSource(getSourceName()); - meta.setUrl(source.toString()); + meta.setUrl(getSourceFile().toURI().toURL().toString()); meta.setPublisher(""); - meta.setUuid(source.toString()); + meta.setUuid(getSourceFile().toString()); meta.setLuid(""); - meta.setLang(getLang(reset(in))); // default is EN - meta.setSubject(getSubject(source)); + meta.setLang(getLang()); // default is EN + meta.setSubject(getSourceFile().getParentFile().getName()); meta.setType(getType().toString()); meta.setImageDocument(false); - meta.setCover(getCover(source)); + meta.setCover(getCover(getSourceFile())); return meta; } - private String getSubject(URL source) throws IOException { - try { - File file = new File(source.toURI()); - return file.getParentFile().getName(); - } catch (URISyntaxException e) { - throw new IOException("Cannot parse the URL to File: " - + source.toString(), e); - } - - } - - private String getLang(InputStream in) { + private String getLang() { @SuppressWarnings("resource") - Scanner scan = new Scanner(in, "UTF-8"); + Scanner scan = new Scanner(getInput(), "UTF-8"); scan.useDelimiter("\\n"); scan.next(); // Title scan.next(); // Author (Date) @@ -103,16 +131,16 @@ class Text extends BasicSupport_Deprecated { return lang; } - private String getTitle(InputStream in) { + private String getTitle() { @SuppressWarnings("resource") - Scanner scan = new Scanner(in, "UTF-8"); + Scanner scan = new Scanner(getInput(), "UTF-8"); scan.useDelimiter("\\n"); return scan.next(); } - private String getAuthor(InputStream in) { + private String getAuthor() { @SuppressWarnings("resource") - Scanner scan = new Scanner(in, "UTF-8"); + Scanner scan = new Scanner(getInput(), "UTF-8"); scan.useDelimiter("\\n"); scan.next(); String authorDate = scan.next(); @@ -126,9 +154,9 @@ class Text extends BasicSupport_Deprecated { return BasicSupportHelper.fixAuthor(author); } - private String getDate(InputStream in) { + private String getDate() { @SuppressWarnings("resource") - Scanner scan = new Scanner(in, "UTF-8"); + Scanner scan = new Scanner(getInput(), "UTF-8"); scan.useDelimiter("\\n"); scan.next(); String authorDate = scan.next(); @@ -147,18 +175,12 @@ class Text extends BasicSupport_Deprecated { } @Override - protected String getDesc(URL source, InputStream in) throws IOException { - return getChapterContent(source, in, 0, null); + protected String getDesc() throws IOException { + return getChapterContent(null, 0, null); } - private Image getCover(URL source) { - String path; - try { - path = new File(source.toURI()).getPath(); - } catch (URISyntaxException e) { - Instance.getTraceHandler().error(e); - path = null; - } + private Image getCover(File sourceFile) { + String path = sourceFile.getName(); for (String ext : new String[] { ".txt", ".text", ".story" }) { if (path.endsWith(ext)) { @@ -166,15 +188,16 @@ class Text extends BasicSupport_Deprecated { } } - return getImage(this, source, path); + return BasicSupportImages.getImage(this, sourceFile.getParentFile(), + path); } @Override - protected List> getChapters(URL source, InputStream in, - Progress pg) throws IOException { + protected List> getChapters(Progress pg) + throws IOException { List> chaps = new ArrayList>(); @SuppressWarnings("resource") - Scanner scan = new Scanner(in, "UTF-8"); + Scanner scan = new Scanner(getInput(), "UTF-8"); scan.useDelimiter("\\n"); boolean prevLineEmpty = false; while (scan.hasNext()) { @@ -185,24 +208,10 @@ class Text extends BasicSupport_Deprecated { if (pos >= 0 && pos + 1 < line.length()) { chapName = line.substring(pos + 1).trim(); } - final URL value = source; - final String key = chapName; - chaps.add(new Entry() { - @Override - public URL setValue(URL value) { - return null; - } - @Override - public URL getValue() { - return value; - } - - @Override - public String getKey() { - return key; - } - }); + chaps.add(new AbstractMap.SimpleEntry(// + chapName, // + getSourceFile().toURI().toURL())); } prevLineEmpty = line.trim().isEmpty(); @@ -212,11 +221,11 @@ class Text extends BasicSupport_Deprecated { } @Override - protected String getChapterContent(URL source, InputStream in, int number, - Progress pg) throws IOException { + protected String getChapterContent(URL source, int number, Progress pg) + throws IOException { StringBuilder builder = new StringBuilder(); @SuppressWarnings("resource") - Scanner scan = new Scanner(in, "UTF-8"); + Scanner scan = new Scanner(getInput(), "UTF-8"); scan.useDelimiter("\\n"); boolean inChap = false; while (scan.hasNext()) { @@ -234,6 +243,22 @@ class Text extends BasicSupport_Deprecated { return builder.toString(); } + @Override + protected void close() { + InputStream in = getInput(); + if (in != null) { + try { + in.close(); + } catch (IOException e) { + Instance.getTraceHandler().error( + new IOException( + "Cannot close the text source file input", e)); + } + } + + super.close(); + } + @Override protected boolean supports(URL url) { return supports(url, false); @@ -296,7 +321,7 @@ class Text extends BasicSupport_Deprecated { * * @return the language or NULL */ - private String detectChapter(String line, int number) { + static private String detectChapter(String line, int number) { line = line.toUpperCase(); for (String lang : Instance.getConfig().getString(Config.CHAPTER) .split(",")) { diff --git a/test/expected/cbz.cbz b/test/expected/cbz.cbz index 569c395..28cc25b 100644 Binary files a/test/expected/cbz.cbz and b/test/expected/cbz.cbz differ diff --git a/test/expected/epub.epub b/test/expected/epub.epub index 83d5236..776d05d 100644 Binary files a/test/expected/epub.epub and b/test/expected/epub.epub differ diff --git a/test/expected/html/html.info b/test/expected/html/html.info index 901736e..506e477 100644 --- a/test/expected/html/html.info +++ b/test/expected/html/html.info @@ -5,7 +5,7 @@ SUBJECT="test" SOURCE="text" URL="file:/media/xubuntu/sd32/workspace/fanfix/test/test.story" TAGS="" -UUID="file:/media/xubuntu/sd32/workspace/fanfix/test/test.story" +UUID="/media/xubuntu/sd32/workspace/fanfix/test/test.story" LUID="" LANG="en" IMAGES_DOCUMENT="false" @@ -14,5 +14,5 @@ COVER="" EPUBCREATOR="Fanfix (by Niki)" PUBLISHER="" WORDCOUNT="57" -CREATION_DATE="2018-03-24 09:27:09" +CREATION_DATE="2018-03-28 08:40:18" FAKE_COVER="false" diff --git a/test/expected/info_text.info b/test/expected/info_text.info index 5836ef9..cc522dd 100644 --- a/test/expected/info_text.info +++ b/test/expected/info_text.info @@ -5,7 +5,7 @@ SUBJECT="test" SOURCE="text" URL="file:/media/xubuntu/sd32/workspace/fanfix/test/test.story" TAGS="" -UUID="file:/media/xubuntu/sd32/workspace/fanfix/test/test.story" +UUID="/media/xubuntu/sd32/workspace/fanfix/test/test.story" LUID="" LANG="en" IMAGES_DOCUMENT="false" @@ -14,5 +14,5 @@ COVER="" EPUBCREATOR="Fanfix (by Niki)" PUBLISHER="" WORDCOUNT="57" -CREATION_DATE="2018-03-24 09:27:09" +CREATION_DATE="2018-03-28 08:39:39" FAKE_COVER="false"