[fanfix.git] / supported / Epub.java

package be.nikiroo.fanfix.supported;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;

import org.jsoup.nodes.Document;

import be.nikiroo.fanfix.Instance;
import be.nikiroo.fanfix.data.MetaData;
import be.nikiroo.utils.IOUtils;
import be.nikiroo.utils.Image;
import be.nikiroo.utils.StringUtils;
import be.nikiroo.utils.streams.MarkableFileInputStream;

/**
 * Support class for EPUB files created with this program (as we need some
 * metadata available in those we create).
 * 
 * @author niki
 */
class Epub extends InfoText {
	private MetaData meta;
	private File tmpDir;
	private String desc;

	private URL fakeSource;
	private InputStream fakeIn;

	public File getSourceFileOriginal() {
		return super.getSourceFile();
	}

	@Override
	protected File getSourceFile() {
		try {
			return new File(fakeSource.toURI());
		} catch (URISyntaxException e) {
			Instance.getInstance().getTraceHandler().error(new IOException(
					"Cannot get the source file from the info-text URL", e));
		}

		return null;
	}

	@Override
	protected InputStream getInput() {
		if (fakeIn != null) {
			try {
				fakeIn.reset();
			} catch (IOException e) {
				Instance.getInstance().getTraceHandler().error(new IOException(
						"Cannot reset the Epub Text stream", e));
			}

			return fakeIn;
		}

		return null;
	}

	@Override
	protected boolean supports(URL url) {
		return url.getPath().toLowerCase().endsWith(".epub");
	}

	@Override
	protected MetaData getMeta() throws IOException {
		return meta;
	}

	@Override
	protected Document loadDocument(URL source) throws IOException {
		super.loadDocument(source); // prepares super.getSourceFile() and
									// super.getInput()

		InputStream in = super.getInput();
		ZipInputStream zipIn = null;
		try {
			zipIn = new ZipInputStream(in);
			tmpDir = Instance.getInstance().getTempFiles()
					.createTempDir("fanfic-reader-parser");
			File tmp = new File(tmpDir, "file.txt");
			File tmpInfo = new File(tmpDir, "file.info");

			fakeSource = tmp.toURI().toURL();
			Image cover = null;

			String url;
			try {
				url = getSource().toURI().toURL().toString();
			} catch (URISyntaxException e1) {
				url = getSource().toString();
			}
			String title = null;
			String author = null;

			for (ZipEntry entry = zipIn
					.getNextEntry(); entry != null; entry = zipIn
							.getNextEntry()) {
				if (!entry.isDirectory()
						&& entry.getName().startsWith(getDataPrefix())) {
					String entryLName = entry.getName().toLowerCase();
					entryLName = entryLName.substring(getDataPrefix().length());

					boolean imageEntry = false;
					for (String ext : bsImages.getImageExt(false)) {
						if (entryLName.endsWith(ext)) {
							imageEntry = true;
						}
					}

					if (entryLName.equals("version")) {
						// Nothing to do for now ("first"
						// version is 3.0)
					} else if (entryLName.endsWith(".info")) {
						// Info file
						IOUtils.write(zipIn, tmpInfo);
					} else if (imageEntry) {
						// Cover
						if (getCover() && cover == null) {
							try {
								Image img = new Image(zipIn);
								if (img.getSize() == 0) {
									img.close();
									throw new IOException(
											"Empty image not accepted");
								}
								cover = img;
							} catch (Exception e) {
								Instance.getInstance().getTraceHandler()
										.error(e);
							}
						}
					} else if (entryLName.equals("url")) {
						String[] descArray = StringUtils
								.unhtml(IOUtils.readSmallStream(zipIn)).trim()
								.split("\n");
						if (descArray.length > 0) {
							url = descArray[0].trim();
						}
					} else if (entryLName.endsWith(".desc")) {
						// // For old files
						// if (this.desc != null) {
						// this.desc = IOUtils.readSmallStream(zipIn).trim();
						// }
					} else if (entryLName.equals("summary")) {
						String[] descArray = StringUtils
								.unhtml(IOUtils.readSmallStream(zipIn)).trim()
								.split("\n");
						int skip = 0;
						if (descArray.length > 1) {
							title = descArray[0].trim();
							skip = 1;
							if (descArray.length > 2
									&& descArray[1].startsWith("©")) {
								author = descArray[1].substring(1).trim();
								skip = 2;
							}
						}
						// this.desc = "";
						// for (int i = skip; i < descArray.length; i++) {
						// this.desc += descArray[i].trim() + "\n";
						// }
						//
						// this.desc = this.desc.trim();
					} else {
						// Hopefully the data file
						IOUtils.write(zipIn, tmp);
					}
				}
			}

			if (requireInfo() && !tmp.exists()) {
				throw new IOException(
						"file not supported (maybe not created with this program or corrupt)");
			}

			if (tmp.exists()) {
				this.fakeIn = new MarkableFileInputStream(tmp);
			}

			if (tmpInfo.exists()) {
				meta = InfoReader.readMeta(tmpInfo, true);
				tmpInfo.delete();
			} else {
				if (title == null || title.isEmpty()) {
					title = getSourceFileOriginal().getName();
					String exts[] = new String[] {".epub", ".cbz"};
					for (String ext : exts) {
						if (title.toLowerCase().endsWith(ext)) {
							title = title.substring(0,
									title.length() - ext.length());
						}
					}
					title = URLDecoder.decode(title, "UTF-8").trim();
				}

				meta = new MetaData();
				meta.setLang("en");
				meta.setTags(Arrays.asList("[no_info]"));
				meta.setSource(getType().getSourceName());
				meta.setUuid(url);
				meta.setUrl(url);
				meta.setTitle(title);
				meta.setAuthor(author);
				meta.setImageDocument(isImagesDocumentByDefault());
				
				InfoReader.completeMeta(tmp, meta);
			}

			if (meta.getCover() == null) {
				if (cover != null) {
					meta.setCover(cover);
				} else {
					meta.setCover(InfoReader.getCoverByName(
							getSourceFileOriginal().toURI().toURL()));
				}
			}
		} finally {
			if (zipIn != null) {
				zipIn.close();
			}
			if (in != null) {
				in.close();
			}
		}

		return null;
	}

	@Override
	protected void close() {
		if (tmpDir != null) {
			IOUtils.deltree(tmpDir);
		}

		tmpDir = null;

		super.close();
	}

	protected String getDataPrefix() {
		return "DATA/";
	}

	protected boolean requireInfo() {
		return true;
	}

	protected boolean getCover() {
		return true;
	}

	protected boolean isImagesDocumentByDefault() {
		return false;
	}
}
Commit	Line	Data
	1	package be.nikiroo.fanfix.supported;
	2
	3	import java.io.File;
	4	import java.io.IOException;
	5	import java.io.InputStream;
	6	import java.net.URISyntaxException;
	7	import java.net.URL;
	8	import java.net.URLDecoder;
	9	import java.util.ArrayList;
	10	import java.util.Arrays;
	11	import java.util.Collections;
	12	import java.util.zip.ZipEntry;
	13	import java.util.zip.ZipInputStream;
	14
	15	import org.jsoup.nodes.Document;
	16
	17	import be.nikiroo.fanfix.Instance;
	18	import be.nikiroo.fanfix.data.MetaData;
	19	import be.nikiroo.utils.IOUtils;
	20	import be.nikiroo.utils.Image;
	21	import be.nikiroo.utils.StringUtils;
	22	import be.nikiroo.utils.streams.MarkableFileInputStream;
	23
	24	/**
	25	* Support class for EPUB files created with this program (as we need some
	26	* metadata available in those we create).
	27	*
	28	* @author niki
	29	*/
	30	class Epub extends InfoText {
	31	private MetaData meta;
	32	private File tmpDir;
	33	private String desc;
	34
	35	private URL fakeSource;
	36	private InputStream fakeIn;
	37
	38	public File getSourceFileOriginal() {
	39	return super.getSourceFile();
	40	}
	41
	42	@Override
	43	protected File getSourceFile() {
	44	try {
	45	return new File(fakeSource.toURI());
	46	} catch (URISyntaxException e) {
	47	Instance.getInstance().getTraceHandler().error(new IOException(
	48	"Cannot get the source file from the info-text URL", e));
	49	}
	50
	51	return null;
	52	}
	53
	54	@Override
	55	protected InputStream getInput() {
	56	if (fakeIn != null) {
	57	try {
	58	fakeIn.reset();
	59	} catch (IOException e) {
	60	Instance.getInstance().getTraceHandler().error(new IOException(
	61	"Cannot reset the Epub Text stream", e));
	62	}
	63
	64	return fakeIn;
	65	}
	66
	67	return null;
	68	}
	69
	70	@Override
	71	protected boolean supports(URL url) {
	72	return url.getPath().toLowerCase().endsWith(".epub");
	73	}
	74
	75	@Override
	76	protected MetaData getMeta() throws IOException {
	77	return meta;
	78	}
	79
	80	@Override
	81	protected Document loadDocument(URL source) throws IOException {
	82	super.loadDocument(source); // prepares super.getSourceFile() and
	83	// super.getInput()
	84
	85	InputStream in = super.getInput();
	86	ZipInputStream zipIn = null;
	87	try {
	88	zipIn = new ZipInputStream(in);
	89	tmpDir = Instance.getInstance().getTempFiles()
	90	.createTempDir("fanfic-reader-parser");
	91	File tmp = new File(tmpDir, "file.txt");
	92	File tmpInfo = new File(tmpDir, "file.info");
	93
	94	fakeSource = tmp.toURI().toURL();
	95	Image cover = null;
	96
	97	String url;
	98	try {
	99	url = getSource().toURI().toURL().toString();
	100	} catch (URISyntaxException e1) {
	101	url = getSource().toString();
	102	}
	103	String title = null;
	104	String author = null;
	105
	106	for (ZipEntry entry = zipIn
	107	.getNextEntry(); entry != null; entry = zipIn
	108	.getNextEntry()) {
	109	if (!entry.isDirectory()
	110	&& entry.getName().startsWith(getDataPrefix())) {
	111	String entryLName = entry.getName().toLowerCase();
	112	entryLName = entryLName.substring(getDataPrefix().length());
	113
	114	boolean imageEntry = false;
	115	for (String ext : bsImages.getImageExt(false)) {
	116	if (entryLName.endsWith(ext)) {
	117	imageEntry = true;
	118	}
	119	}
	120
	121	if (entryLName.equals("version")) {
	122	// Nothing to do for now ("first"
	123	// version is 3.0)
	124	} else if (entryLName.endsWith(".info")) {
	125	// Info file
	126	IOUtils.write(zipIn, tmpInfo);
	127	} else if (imageEntry) {
	128	// Cover
	129	if (getCover() && cover == null) {
	130	try {
	131	Image img = new Image(zipIn);
	132	if (img.getSize() == 0) {
	133	img.close();
	134	throw new IOException(
	135	"Empty image not accepted");
	136	}
	137	cover = img;
	138	} catch (Exception e) {
	139	Instance.getInstance().getTraceHandler()
	140	.error(e);
	141	}
	142	}
	143	} else if (entryLName.equals("url")) {
	144	String[] descArray = StringUtils
	145	.unhtml(IOUtils.readSmallStream(zipIn)).trim()
	146	.split("\n");
	147	if (descArray.length > 0) {
	148	url = descArray[0].trim();
	149	}
	150	} else if (entryLName.endsWith(".desc")) {
	151	// // For old files
	152	// if (this.desc != null) {
	153	// this.desc = IOUtils.readSmallStream(zipIn).trim();
	154	// }
	155	} else if (entryLName.equals("summary")) {
	156	String[] descArray = StringUtils
	157	.unhtml(IOUtils.readSmallStream(zipIn)).trim()
	158	.split("\n");
	159	int skip = 0;
	160	if (descArray.length > 1) {
	161	title = descArray[0].trim();
	162	skip = 1;
	163	if (descArray.length > 2
	164	&& descArray[1].startsWith("©")) {
	165	author = descArray[1].substring(1).trim();
	166	skip = 2;
	167	}
	168	}
	169	// this.desc = "";
	170	// for (int i = skip; i < descArray.length; i++) {
	171	// this.desc += descArray[i].trim() + "\n";
	172	// }
	173	//
	174	// this.desc = this.desc.trim();
	175	} else {
	176	// Hopefully the data file
	177	IOUtils.write(zipIn, tmp);
	178	}
	179	}
	180	}
	181
	182	if (requireInfo() && !tmp.exists()) {
	183	throw new IOException(
	184	"file not supported (maybe not created with this program or corrupt)");
	185	}
	186
	187	if (tmp.exists()) {
	188	this.fakeIn = new MarkableFileInputStream(tmp);
	189	}
	190
	191	if (tmpInfo.exists()) {
	192	meta = InfoReader.readMeta(tmpInfo, true);
	193	tmpInfo.delete();
	194	} else {
	195	if (title == null \|\| title.isEmpty()) {
	196	title = getSourceFileOriginal().getName();
	197	String exts[] = new String[] {".epub", ".cbz"};
	198	for (String ext : exts) {
	199	if (title.toLowerCase().endsWith(ext)) {
	200	title = title.substring(0,
	201	title.length() - ext.length());
	202	}
	203	}
	204	title = URLDecoder.decode(title, "UTF-8").trim();
	205	}
	206
	207	meta = new MetaData();
	208	meta.setLang("en");
	209	meta.setTags(Arrays.asList("[no_info]"));
	210	meta.setSource(getType().getSourceName());
	211	meta.setUuid(url);
	212	meta.setUrl(url);
	213	meta.setTitle(title);
	214	meta.setAuthor(author);
	215	meta.setImageDocument(isImagesDocumentByDefault());
	216
	217	InfoReader.completeMeta(tmp, meta);
	218	}
	219
	220	if (meta.getCover() == null) {
	221	if (cover != null) {
	222	meta.setCover(cover);
	223	} else {
	224	meta.setCover(InfoReader.getCoverByName(
	225	getSourceFileOriginal().toURI().toURL()));
	226	}
	227	}
	228	} finally {
	229	if (zipIn != null) {
	230	zipIn.close();
	231	}
	232	if (in != null) {
	233	in.close();
	234	}
	235	}
	236
	237	return null;
	238	}
	239
	240	@Override
	241	protected void close() {
	242	if (tmpDir != null) {
	243	IOUtils.deltree(tmpDir);
	244	}
	245
	246	tmpDir = null;
	247
	248	super.close();
	249	}
	250
	251	protected String getDataPrefix() {
	252	return "DATA/";
	253	}
	254
	255	protected boolean requireInfo() {
	256	return true;
	257	}
	258
	259	protected boolean getCover() {
	260	return true;
	261	}
	262
	263	protected boolean isImagesDocumentByDefault() {
	264	return false;
	265	}
	266	}