cbz/epub read support: be more permissive
[nikiroo-utils.git] / supported / InfoReader.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.File;
4 import java.io.FileNotFoundException;
5 import java.io.IOException;
6 import java.io.InputStream;
7 import java.net.URL;
8 import java.util.ArrayList;
9 import java.util.List;
10 import java.util.Scanner;
11
12 import be.nikiroo.fanfix.Instance;
13 import be.nikiroo.fanfix.bundles.Config;
14 import be.nikiroo.fanfix.data.MetaData;
15 import be.nikiroo.utils.Image;
16 import be.nikiroo.utils.streams.MarkableFileInputStream;
17
18 // not complete: no "description" tag
19 public class InfoReader {
20 static protected BasicSupportHelper bsHelper = new BasicSupportHelper();
21 // static protected BasicSupportImages bsImages = new BasicSupportImages();
22 // static protected BasicSupportPara bsPara = new BasicSupportPara(new
23 // BasicSupportHelper(), new BasicSupportImages());
24
25 public static MetaData readMeta(File infoFile, boolean withCover)
26 throws IOException {
27 if (infoFile == null) {
28 throw new IOException("File is null");
29 }
30
31 if (infoFile.exists()) {
32 InputStream in = new MarkableFileInputStream(infoFile);
33 try {
34 MetaData meta = createMeta(infoFile.toURI().toURL(), in,
35 withCover);
36
37 // Some old .info files were using UUID for URL...
38 if (!hasIt(meta.getUrl()) && meta.getUuid() != null
39 && (meta.getUuid().startsWith("http://")
40 || meta.getUuid().startsWith("https://"))) {
41 meta.setUrl(meta.getUuid());
42 }
43
44 // Some old .info files don't have those now required fields...
45 // So we check if we can find the info in another way (many
46 // formats have a copy of the original text file)
47 if (!hasIt(meta.getTitle(), meta.getAuthor(), meta.getDate(),
48 meta.getUrl())) {
49
50 // TODO: not nice, would be better to do it properly...
51 String base = infoFile.getPath();
52 if (base.endsWith(".info")) {
53 base = base.substring(0,
54 base.length() - ".info".length());
55 }
56 File textFile = new File(base);
57 if (!textFile.exists()) {
58 textFile = new File(base + ".txt");
59 }
60 if (!textFile.exists()) {
61 textFile = new File(base + ".text");
62 }
63
64 completeMeta(textFile, meta);
65 //
66 }
67
68 return meta;
69 } finally {
70 in.close();
71 }
72 }
73
74 throw new FileNotFoundException(
75 "File given as argument does not exists: "
76 + infoFile.getAbsolutePath());
77 }
78
79 /**
80 * Complete the given {@link MetaData} with the original text file if needed
81 * and possible.
82 *
83 * @param textFile
84 * the original text file
85 * @param meta
86 * the {@link MetaData} to complete if needed and possible
87 *
88 * @throws IOException
89 * in case of I/O errors
90 */
91 static public void completeMeta(File textFile,
92 MetaData meta) throws IOException {
93 if (textFile != null && textFile.exists()) {
94 final URL source = textFile.toURI().toURL();
95 final MetaData[] superMetaA = new MetaData[1];
96 @SuppressWarnings("unused")
97 Text unused = new Text() {
98 private boolean loaded = loadDocument();
99
100 @Override
101 public SupportType getType() {
102 return SupportType.TEXT;
103 }
104
105 protected boolean loadDocument() throws IOException {
106 loadDocument(source);
107 superMetaA[0] = getMeta();
108 return true;
109 }
110
111 @Override
112 protected Image getCover(File sourceFile) {
113 return null;
114 }
115 };
116
117 MetaData superMeta = superMetaA[0];
118 if (!hasIt(meta.getTitle())) {
119 meta.setTitle(superMeta.getTitle());
120 }
121 if (!hasIt(meta.getAuthor())) {
122 meta.setAuthor(superMeta.getAuthor());
123 }
124 if (!hasIt(meta.getDate())) {
125 meta.setDate(superMeta.getDate());
126 }
127 if (!hasIt(meta.getUrl())) {
128 meta.setUrl(superMeta.getUrl());
129 }
130 }
131 }
132
133 /**
134 * Check if we have non-empty values for all the given {@link String}s.
135 *
136 * @param values
137 * the values to check
138 *
139 * @return TRUE if none of them was NULL or empty
140 */
141 static private boolean hasIt(String... values) {
142 for (String value : values) {
143 if (value == null || value.trim().isEmpty()) {
144 return false;
145 }
146 }
147
148 return true;
149 }
150
151 private static MetaData createMeta(URL sourceInfoFile, InputStream in,
152 boolean withCover) throws IOException {
153 MetaData meta = new MetaData();
154
155 meta.setTitle(getInfoTag(in, "TITLE"));
156 meta.setAuthor(getInfoTag(in, "AUTHOR"));
157 meta.setDate(bsHelper.formatDate(getInfoTag(in, "DATE")));
158 meta.setTags(getInfoTagList(in, "TAGS", ","));
159 meta.setSource(getInfoTag(in, "SOURCE"));
160 meta.setUrl(getInfoTag(in, "URL"));
161 meta.setPublisher(getInfoTag(in, "PUBLISHER"));
162 meta.setUuid(getInfoTag(in, "UUID"));
163 meta.setLuid(getInfoTag(in, "LUID"));
164 meta.setLang(getInfoTag(in, "LANG"));
165 meta.setSubject(getInfoTag(in, "SUBJECT"));
166 meta.setType(getInfoTag(in, "TYPE"));
167 meta.setImageDocument(getInfoTagBoolean(in, "IMAGES_DOCUMENT", false));
168 if (withCover) {
169 String infoTag = getInfoTag(in, "COVER");
170 if (infoTag != null && !infoTag.trim().isEmpty()) {
171 meta.setCover(bsHelper.getImage(null, sourceInfoFile, infoTag));
172 }
173 if (meta.getCover() == null) {
174 // Second chance: try to check for a cover next to the info file
175 meta.setCover(getCoverByName(sourceInfoFile));
176 }
177 }
178 try {
179 meta.setWords(Long.parseLong(getInfoTag(in, "WORDCOUNT")));
180 } catch (NumberFormatException e) {
181 meta.setWords(0);
182 }
183 meta.setCreationDate(
184 bsHelper.formatDate(getInfoTag(in, "CREATION_DATE")));
185 meta.setFakeCover(Boolean.parseBoolean(getInfoTag(in, "FAKE_COVER")));
186
187 if (withCover && meta.getCover() == null) {
188 meta.setCover(bsHelper.getDefaultCover(meta.getSubject()));
189 }
190
191 return meta;
192 }
193
194 /**
195 * Return the cover image if it is next to the source file.
196 *
197 * @param sourceInfoFile
198 * the source file
199 *
200 * @return the cover if present, NULL if not
201 */
202 public static Image getCoverByName(URL sourceInfoFile) {
203 Image cover = null;
204
205 File basefile = new File(sourceInfoFile.getFile());
206
207 String ext = "." + Instance.getInstance().getConfig()
208 .getString(Config.FILE_FORMAT_IMAGE_FORMAT_COVER).toLowerCase();
209
210 // Without removing ext
211 cover = bsHelper.getImage(null, sourceInfoFile,
212 basefile.getAbsolutePath() + ext);
213
214 // Try without ext
215 String name = basefile.getName();
216 int pos = name.lastIndexOf(".");
217 if (cover == null && pos > 0) {
218 name = name.substring(0, pos);
219 basefile = new File(basefile.getParent(), name);
220
221 cover = bsHelper.getImage(null, sourceInfoFile,
222 basefile.getAbsolutePath() + ext);
223 }
224
225 return cover;
226 }
227
228 private static boolean getInfoTagBoolean(InputStream in, String key,
229 boolean def) throws IOException {
230 Boolean value = getInfoTagBoolean(in, key);
231 return value == null ? def : value;
232 }
233
234 private static Boolean getInfoTagBoolean(InputStream in, String key)
235 throws IOException {
236 String value = getInfoTag(in, key);
237 if (value != null && !value.trim().isEmpty()) {
238 value = value.toLowerCase().trim();
239 return value.equals("1") || value.equals("on")
240 || value.equals("true") || value.equals("yes");
241 }
242
243 return null;
244 }
245
246 private static List<String> getInfoTagList(InputStream in, String key,
247 String separator) throws IOException {
248 List<String> list = new ArrayList<String>();
249 String tt = getInfoTag(in, key);
250 if (tt != null) {
251 for (String tag : tt.split(separator)) {
252 list.add(tag.trim());
253 }
254 }
255
256 return list;
257 }
258
259 /**
260 * Return the value of the given tag in the <tt>.info</tt> file if present.
261 *
262 * @param key
263 * the tag key
264 *
265 * @return the value or NULL
266 *
267 * @throws IOException
268 * in case of I/O error
269 */
270 private static String getInfoTag(InputStream in, String key)
271 throws IOException {
272 key = "^" + key + "=";
273
274 if (in != null) {
275 in.reset();
276 String value = getLine(in, key, 0);
277 if (value != null && !value.isEmpty()) {
278 value = value.trim().substring(key.length() - 1).trim();
279 if (value.length() > 1 && //
280 (value.startsWith("'") && value.endsWith("'")
281 || value.startsWith("\"")
282 && value.endsWith("\""))) {
283 value = value.substring(1, value.length() - 1).trim();
284 }
285
286 // Some old files ended up with TITLE="'xxxxx'"
287 if ("^TITLE=".equals(key)) {
288 if (value.startsWith("'") && value.endsWith("'")
289 && value.length() > 1) {
290 value = value.substring(1, value.length() - 1).trim();
291 }
292 }
293
294 return value;
295 }
296 }
297
298 return null;
299 }
300
301 /**
302 * Return the first line from the given input which correspond to the given
303 * selectors.
304 *
305 * @param in
306 * the input
307 * @param needle
308 * a string that must be found inside the target line (also
309 * supports "^" at start to say "only if it starts with" the
310 * needle)
311 * @param relativeLine
312 * the line to return based upon the target line position (-1 =
313 * the line before, 0 = the target line...)
314 *
315 * @return the line
316 */
317 static private String getLine(InputStream in, String needle,
318 int relativeLine) {
319 return getLine(in, needle, relativeLine, true);
320 }
321
322 /**
323 * Return a line from the given input which correspond to the given
324 * selectors.
325 *
326 * @param in
327 * the input
328 * @param needle
329 * a string that must be found inside the target line (also
330 * supports "^" at start to say "only if it starts with" the
331 * needle)
332 * @param relativeLine
333 * the line to return based upon the target line position (-1 =
334 * the line before, 0 = the target line...)
335 * @param first
336 * takes the first result (as opposed to the last one, which will
337 * also always spend the input)
338 *
339 * @return the line
340 */
341 static private String getLine(InputStream in, String needle,
342 int relativeLine, boolean first) {
343 String rep = null;
344
345 List<String> lines = new ArrayList<String>();
346 @SuppressWarnings("resource")
347 Scanner scan = new Scanner(in, "UTF-8");
348 int index = -1;
349 scan.useDelimiter("\\n");
350 while (scan.hasNext()) {
351 lines.add(scan.next());
352
353 if (index == -1) {
354 if (needle.startsWith("^")) {
355 if (lines.get(lines.size() - 1)
356 .startsWith(needle.substring(1))) {
357 index = lines.size() - 1;
358 }
359
360 } else {
361 if (lines.get(lines.size() - 1).contains(needle)) {
362 index = lines.size() - 1;
363 }
364 }
365 }
366
367 if (index >= 0 && index + relativeLine < lines.size()) {
368 rep = lines.get(index + relativeLine);
369 if (first) {
370 break;
371 }
372 }
373 }
374
375 return rep;
376 }
377 }