fix for old files
[nikiroo-utils.git] / supported / Epub.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.File;
4 import java.io.IOException;
5 import java.io.InputStream;
6 import java.net.URISyntaxException;
7 import java.net.URL;
8 import java.net.URLDecoder;
9 import java.util.ArrayList;
10 import java.util.zip.ZipEntry;
11 import java.util.zip.ZipInputStream;
12
13 import org.jsoup.nodes.Document;
14
15 import be.nikiroo.fanfix.Instance;
16 import be.nikiroo.fanfix.data.MetaData;
17 import be.nikiroo.utils.IOUtils;
18 import be.nikiroo.utils.Image;
19 import be.nikiroo.utils.StringUtils;
20 import be.nikiroo.utils.streams.MarkableFileInputStream;
21
22 /**
23 * Support class for EPUB files created with this program (as we need some
24 * metadata available in those we create).
25 *
26 * @author niki
27 */
28 class Epub extends InfoText {
29 private MetaData meta;
30 private File tmpDir;
31 private String desc;
32
33 private URL fakeSource;
34 private InputStream fakeIn;
35
36 public File getSourceFileOriginal() {
37 return super.getSourceFile();
38 }
39
40 @Override
41 protected File getSourceFile() {
42 try {
43 return new File(fakeSource.toURI());
44 } catch (URISyntaxException e) {
45 Instance.getInstance().getTraceHandler().error(new IOException(
46 "Cannot get the source file from the info-text URL", e));
47 }
48
49 return null;
50 }
51
52 @Override
53 protected InputStream getInput() {
54 if (fakeIn != null) {
55 try {
56 fakeIn.reset();
57 } catch (IOException e) {
58 Instance.getInstance().getTraceHandler().error(new IOException(
59 "Cannot reset the Epub Text stream", e));
60 }
61
62 return fakeIn;
63 }
64
65 return null;
66 }
67
68 @Override
69 protected boolean supports(URL url) {
70 return url.getPath().toLowerCase().endsWith(".epub");
71 }
72
73 @Override
74 protected MetaData getMeta() throws IOException {
75 return meta;
76 }
77
78 @Override
79 protected Document loadDocument(URL source) throws IOException {
80 super.loadDocument(source); // prepares super.getSourceFile() and
81 // super.getInput()
82
83 InputStream in = super.getInput();
84 ZipInputStream zipIn = null;
85 try {
86 zipIn = new ZipInputStream(in);
87 tmpDir = Instance.getInstance().getTempFiles()
88 .createTempDir("fanfic-reader-parser");
89 File tmp = new File(tmpDir, "file.txt");
90 File tmpInfo = new File(tmpDir, "file.info");
91
92 fakeSource = tmp.toURI().toURL();
93 Image cover = null;
94
95 String url;
96 try {
97 url = getSource().toURI().toURL().toString();
98 } catch (URISyntaxException e1) {
99 url = getSource().toString();
100 }
101 String title = null;
102 String author = null;
103
104 for (ZipEntry entry = zipIn
105 .getNextEntry(); entry != null; entry = zipIn
106 .getNextEntry()) {
107 if (!entry.isDirectory()
108 && entry.getName().startsWith(getDataPrefix())) {
109 String entryLName = entry.getName().toLowerCase();
110
111 boolean imageEntry = false;
112 for (String ext : bsImages.getImageExt(false)) {
113 if (entryLName.endsWith(ext)) {
114 imageEntry = true;
115 }
116 }
117
118 if (entry.getName().equals(getDataPrefix() + "version")) {
119 // Nothing to do for now ("first"
120 // version is 3.0)
121 } else if (entryLName.endsWith(".info")) {
122 // Info file
123 IOUtils.write(zipIn, tmpInfo);
124 } else if (imageEntry) {
125 // Cover
126 if (getCover()) {
127 try {
128 cover = new Image(zipIn);
129 } catch (Exception e) {
130 Instance.getInstance().getTraceHandler()
131 .error(e);
132 }
133 }
134 } else if (entry.getName()
135 .equals(getDataPrefix() + "URL")) {
136 String[] descArray = StringUtils
137 .unhtml(IOUtils.readSmallStream(zipIn)).trim()
138 .split("\n");
139 if (descArray.length > 0) {
140 url = descArray[0].trim();
141 }
142 } else if (entry.getName().endsWith(".desc")) {
143 // // For old files
144 // if (this.desc != null) {
145 // this.desc = IOUtils.readSmallStream(zipIn).trim();
146 // }
147 } else if (entry.getName()
148 .equals(getDataPrefix() + "SUMMARY")) {
149 String[] descArray = StringUtils
150 .unhtml(IOUtils.readSmallStream(zipIn)).trim()
151 .split("\n");
152 int skip = 0;
153 if (descArray.length > 1) {
154 title = descArray[0].trim();
155 skip = 1;
156 if (descArray.length > 2
157 && descArray[1].startsWith("©")) {
158 author = descArray[1].substring(1).trim();
159 skip = 2;
160 }
161 }
162 // this.desc = "";
163 // for (int i = skip; i < descArray.length; i++) {
164 // this.desc += descArray[i].trim() + "\n";
165 // }
166 //
167 // this.desc = this.desc.trim();
168 } else {
169 // Hopefully the data file
170 IOUtils.write(zipIn, tmp);
171 }
172 }
173 }
174
175 if (requireInfo() && (!tmp.exists() || !tmpInfo.exists())) {
176 throw new IOException(
177 "file not supported (maybe not created with this program or corrupt)");
178 }
179
180 if (tmp.exists()) {
181 this.fakeIn = new MarkableFileInputStream(tmp);
182 }
183
184 if (tmpInfo.exists()) {
185 meta = InfoReader.readMeta(tmpInfo, true);
186 tmpInfo.delete();
187 } else {
188 if (title == null || title.isEmpty()) {
189 title = getSourceFileOriginal().getName();
190 if (title.toLowerCase().endsWith(".cbz")) {
191 title = title.substring(0, title.length() - 4);
192 }
193 title = URLDecoder.decode(title, "UTF-8").trim();
194 }
195
196 meta = new MetaData();
197 meta.setLang("en");
198 meta.setTags(new ArrayList<String>());
199 meta.setSource(getType().getSourceName());
200 meta.setUuid(url);
201 meta.setUrl(url);
202 meta.setTitle(title);
203 meta.setAuthor(author);
204 meta.setImageDocument(isImagesDocumentByDefault());
205 }
206
207 if (meta.getCover() == null) {
208 if (cover != null) {
209 meta.setCover(cover);
210 } else {
211 meta.setCover(InfoReader.getCoverByName(
212 getSourceFileOriginal().toURI().toURL()));
213 }
214 }
215 } finally {
216 if (zipIn != null) {
217 zipIn.close();
218 }
219 if (in != null) {
220 in.close();
221 }
222 }
223
224 return null;
225 }
226
227 @Override
228 protected void close() {
229 if (tmpDir != null) {
230 IOUtils.deltree(tmpDir);
231 }
232
233 tmpDir = null;
234
235 super.close();
236 }
237
238 protected String getDataPrefix() {
239 return "DATA/";
240 }
241
242 protected boolean requireInfo() {
243 return true;
244 }
245
246 protected boolean getCover() {
247 return true;
248 }
249
250 protected boolean isImagesDocumentByDefault() {
251 return false;
252 }
253 }