cbz/epub read support: be more permissive
[nikiroo-utils.git] / supported / Epub.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.File;
4 import java.io.IOException;
5 import java.io.InputStream;
6 import java.net.URISyntaxException;
7 import java.net.URL;
8 import java.net.URLDecoder;
9 import java.util.ArrayList;
10 import java.util.Arrays;
11 import java.util.Collections;
12 import java.util.zip.ZipEntry;
13 import java.util.zip.ZipInputStream;
14
15 import org.jsoup.nodes.Document;
16
17 import be.nikiroo.fanfix.Instance;
18 import be.nikiroo.fanfix.data.MetaData;
19 import be.nikiroo.utils.IOUtils;
20 import be.nikiroo.utils.Image;
21 import be.nikiroo.utils.StringUtils;
22 import be.nikiroo.utils.streams.MarkableFileInputStream;
23
24 /**
25 * Support class for EPUB files created with this program (as we need some
26 * metadata available in those we create).
27 *
28 * @author niki
29 */
30 class Epub extends InfoText {
31 private MetaData meta;
32 private File tmpDir;
33 private String desc;
34
35 private URL fakeSource;
36 private InputStream fakeIn;
37
38 public File getSourceFileOriginal() {
39 return super.getSourceFile();
40 }
41
42 @Override
43 protected File getSourceFile() {
44 try {
45 return new File(fakeSource.toURI());
46 } catch (URISyntaxException e) {
47 Instance.getInstance().getTraceHandler().error(new IOException(
48 "Cannot get the source file from the info-text URL", e));
49 }
50
51 return null;
52 }
53
54 @Override
55 protected InputStream getInput() {
56 if (fakeIn != null) {
57 try {
58 fakeIn.reset();
59 } catch (IOException e) {
60 Instance.getInstance().getTraceHandler().error(new IOException(
61 "Cannot reset the Epub Text stream", e));
62 }
63
64 return fakeIn;
65 }
66
67 return null;
68 }
69
70 @Override
71 protected boolean supports(URL url) {
72 return url.getPath().toLowerCase().endsWith(".epub");
73 }
74
75 @Override
76 protected MetaData getMeta() throws IOException {
77 return meta;
78 }
79
80 @Override
81 protected Document loadDocument(URL source) throws IOException {
82 super.loadDocument(source); // prepares super.getSourceFile() and
83 // super.getInput()
84
85 InputStream in = super.getInput();
86 ZipInputStream zipIn = null;
87 try {
88 zipIn = new ZipInputStream(in);
89 tmpDir = Instance.getInstance().getTempFiles()
90 .createTempDir("fanfic-reader-parser");
91 File tmp = new File(tmpDir, "file.txt");
92 File tmpInfo = new File(tmpDir, "file.info");
93
94 fakeSource = tmp.toURI().toURL();
95 Image cover = null;
96
97 String url;
98 try {
99 url = getSource().toURI().toURL().toString();
100 } catch (URISyntaxException e1) {
101 url = getSource().toString();
102 }
103 String title = null;
104 String author = null;
105
106 for (ZipEntry entry = zipIn
107 .getNextEntry(); entry != null; entry = zipIn
108 .getNextEntry()) {
109 if (!entry.isDirectory()
110 && entry.getName().startsWith(getDataPrefix())) {
111 String entryLName = entry.getName().toLowerCase();
112 entryLName = entryLName.substring(getDataPrefix().length());
113
114 boolean imageEntry = false;
115 for (String ext : bsImages.getImageExt(false)) {
116 if (entryLName.endsWith(ext)) {
117 imageEntry = true;
118 }
119 }
120
121 if (entryLName.equals("version")) {
122 // Nothing to do for now ("first"
123 // version is 3.0)
124 } else if (entryLName.endsWith(".info")) {
125 // Info file
126 IOUtils.write(zipIn, tmpInfo);
127 } else if (imageEntry) {
128 // Cover
129 if (getCover() && cover == null) {
130 try {
131 cover = new Image(zipIn);
132 } catch (Exception e) {
133 Instance.getInstance().getTraceHandler()
134 .error(e);
135 }
136 }
137 } else if (entryLName.equals("url")) {
138 String[] descArray = StringUtils
139 .unhtml(IOUtils.readSmallStream(zipIn)).trim()
140 .split("\n");
141 if (descArray.length > 0) {
142 url = descArray[0].trim();
143 }
144 } else if (entryLName.endsWith(".desc")) {
145 // // For old files
146 // if (this.desc != null) {
147 // this.desc = IOUtils.readSmallStream(zipIn).trim();
148 // }
149 } else if (entryLName.equals("summary")) {
150 String[] descArray = StringUtils
151 .unhtml(IOUtils.readSmallStream(zipIn)).trim()
152 .split("\n");
153 int skip = 0;
154 if (descArray.length > 1) {
155 title = descArray[0].trim();
156 skip = 1;
157 if (descArray.length > 2
158 && descArray[1].startsWith("©")) {
159 author = descArray[1].substring(1).trim();
160 skip = 2;
161 }
162 }
163 // this.desc = "";
164 // for (int i = skip; i < descArray.length; i++) {
165 // this.desc += descArray[i].trim() + "\n";
166 // }
167 //
168 // this.desc = this.desc.trim();
169 } else {
170 // Hopefully the data file
171 IOUtils.write(zipIn, tmp);
172 }
173 }
174 }
175
176 if (requireInfo() && !tmp.exists()) {
177 throw new IOException(
178 "file not supported (maybe not created with this program or corrupt)");
179 }
180
181 if (tmp.exists()) {
182 this.fakeIn = new MarkableFileInputStream(tmp);
183 }
184
185 if (tmpInfo.exists()) {
186 meta = InfoReader.readMeta(tmpInfo, true);
187 tmpInfo.delete();
188 } else {
189 if (title == null || title.isEmpty()) {
190 title = getSourceFileOriginal().getName();
191 String exts[] = new String[] {".epub", ".cbz"};
192 for (String ext : exts) {
193 if (title.toLowerCase().endsWith(ext)) {
194 title = title.substring(0,
195 title.length() - ext.length());
196 }
197 }
198 title = URLDecoder.decode(title, "UTF-8").trim();
199 }
200
201 meta = new MetaData();
202 meta.setLang("en");
203 meta.setTags(Arrays.asList("[no_info]"));
204 meta.setSource(getType().getSourceName());
205 meta.setUuid(url);
206 meta.setUrl(url);
207 meta.setTitle(title);
208 meta.setAuthor(author);
209 meta.setImageDocument(isImagesDocumentByDefault());
210
211 InfoReader.completeMeta(tmp, meta);
212 }
213
214 if (meta.getCover() == null) {
215 if (cover != null) {
216 meta.setCover(cover);
217 } else {
218 meta.setCover(InfoReader.getCoverByName(
219 getSourceFileOriginal().toURI().toURL()));
220 }
221 }
222 } finally {
223 if (zipIn != null) {
224 zipIn.close();
225 }
226 if (in != null) {
227 in.close();
228 }
229 }
230
231 return null;
232 }
233
234 @Override
235 protected void close() {
236 if (tmpDir != null) {
237 IOUtils.deltree(tmpDir);
238 }
239
240 tmpDir = null;
241
242 super.close();
243 }
244
245 protected String getDataPrefix() {
246 return "DATA/";
247 }
248
249 protected boolean requireInfo() {
250 return true;
251 }
252
253 protected boolean getCover() {
254 return true;
255 }
256
257 protected boolean isImagesDocumentByDefault() {
258 return false;
259 }
260 }