fix for old files
[nikiroo-utils.git] / supported / Epub.java
CommitLineData
08fe2e33
NR
1package be.nikiroo.fanfix.supported;
2
08fe2e33 3import java.io.File;
08fe2e33
NR
4import java.io.IOException;
5import java.io.InputStream;
7445f856 6import java.net.URISyntaxException;
08fe2e33 7import java.net.URL;
778d8d85 8import java.net.URLDecoder;
68686a37 9import java.util.ArrayList;
08fe2e33
NR
10import java.util.zip.ZipEntry;
11import java.util.zip.ZipInputStream;
12
7445f856
NR
13import org.jsoup.nodes.Document;
14
08fe2e33 15import be.nikiroo.fanfix.Instance;
68686a37 16import be.nikiroo.fanfix.data.MetaData;
08fe2e33 17import be.nikiroo.utils.IOUtils;
16a81ef7 18import be.nikiroo.utils.Image;
b7afbe42 19import be.nikiroo.utils.StringUtils;
8d59ce07 20import be.nikiroo.utils.streams.MarkableFileInputStream;
08fe2e33
NR
21
22/**
23 * Support class for EPUB files created with this program (as we need some
24 * metadata available in those we create).
25 *
26 * @author niki
27 */
68686a37 28class Epub extends InfoText {
7445f856 29 private MetaData meta;
2aac79c7 30 private File tmpDir;
b7afbe42 31 private String desc;
08fe2e33 32
68686a37
NR
33 private URL fakeSource;
34 private InputStream fakeIn;
08fe2e33 35
7445f856 36 public File getSourceFileOriginal() {
298d405a 37 return super.getSourceFile();
08fe2e33
NR
38 }
39
40 @Override
7445f856
NR
41 protected File getSourceFile() {
42 try {
43 return new File(fakeSource.toURI());
44 } catch (URISyntaxException e) {
076caecc
NR
45 Instance.getInstance().getTraceHandler().error(new IOException(
46 "Cannot get the source file from the info-text URL", e));
08fe2e33
NR
47 }
48
49 return null;
50 }
51
52 @Override
7445f856 53 protected InputStream getInput() {
298d405a
NR
54 if (fakeIn != null) {
55 try {
56 fakeIn.reset();
57 } catch (IOException e) {
076caecc
NR
58 Instance.getInstance().getTraceHandler().error(new IOException(
59 "Cannot reset the Epub Text stream", e));
298d405a
NR
60 }
61
62 return fakeIn;
63 }
64
65 return null;
08fe2e33
NR
66 }
67
68 @Override
7445f856
NR
69 protected boolean supports(URL url) {
70 return url.getPath().toLowerCase().endsWith(".epub");
71 }
08fe2e33 72
7445f856
NR
73 @Override
74 protected MetaData getMeta() throws IOException {
75 return meta;
08fe2e33
NR
76 }
77
78 @Override
7445f856
NR
79 protected Document loadDocument(URL source) throws IOException {
80 super.loadDocument(source); // prepares super.getSourceFile() and
81 // super.getInput()
82
83 InputStream in = super.getInput();
84 ZipInputStream zipIn = null;
85 try {
86 zipIn = new ZipInputStream(in);
076caecc
NR
87 tmpDir = Instance.getInstance().getTempFiles()
88 .createTempDir("fanfic-reader-parser");
7445f856
NR
89 File tmp = new File(tmpDir, "file.txt");
90 File tmpInfo = new File(tmpDir, "file.info");
91
92 fakeSource = tmp.toURI().toURL();
93 Image cover = null;
94
95 String url;
96 try {
97 url = getSource().toURI().toURL().toString();
98 } catch (URISyntaxException e1) {
99 url = getSource().toString();
100 }
101 String title = null;
102 String author = null;
103
076caecc
NR
104 for (ZipEntry entry = zipIn
105 .getNextEntry(); entry != null; entry = zipIn
106 .getNextEntry()) {
7445f856
NR
107 if (!entry.isDirectory()
108 && entry.getName().startsWith(getDataPrefix())) {
109 String entryLName = entry.getName().toLowerCase();
110
111 boolean imageEntry = false;
8d59ce07 112 for (String ext : bsImages.getImageExt(false)) {
7445f856
NR
113 if (entryLName.endsWith(ext)) {
114 imageEntry = true;
115 }
08fe2e33 116 }
08fe2e33 117
7445f856
NR
118 if (entry.getName().equals(getDataPrefix() + "version")) {
119 // Nothing to do for now ("first"
120 // version is 3.0)
121 } else if (entryLName.endsWith(".info")) {
122 // Info file
123 IOUtils.write(zipIn, tmpInfo);
124 } else if (imageEntry) {
125 // Cover
126 if (getCover()) {
127 try {
128 cover = new Image(zipIn);
129 } catch (Exception e) {
076caecc
NR
130 Instance.getInstance().getTraceHandler()
131 .error(e);
7445f856 132 }
08fe2e33 133 }
076caecc
NR
134 } else if (entry.getName()
135 .equals(getDataPrefix() + "URL")) {
7445f856
NR
136 String[] descArray = StringUtils
137 .unhtml(IOUtils.readSmallStream(zipIn)).trim()
138 .split("\n");
139 if (descArray.length > 0) {
140 url = descArray[0].trim();
141 }
076caecc
NR
142 } else if (entry.getName().endsWith(".desc")) {
143 // // For old files
144 // if (this.desc != null) {
145 // this.desc = IOUtils.readSmallStream(zipIn).trim();
146 // }
147 } else if (entry.getName()
148 .equals(getDataPrefix() + "SUMMARY")) {
7445f856
NR
149 String[] descArray = StringUtils
150 .unhtml(IOUtils.readSmallStream(zipIn)).trim()
151 .split("\n");
152 int skip = 0;
153 if (descArray.length > 1) {
154 title = descArray[0].trim();
155 skip = 1;
156 if (descArray.length > 2
157 && descArray[1].startsWith("©")) {
158 author = descArray[1].substring(1).trim();
159 skip = 2;
160 }
161 }
076caecc
NR
162 // this.desc = "";
163 // for (int i = skip; i < descArray.length; i++) {
164 // this.desc += descArray[i].trim() + "\n";
165 // }
166 //
167 // this.desc = this.desc.trim();
7445f856
NR
168 } else {
169 // Hopefully the data file
170 IOUtils.write(zipIn, tmp);
171 }
08fe2e33
NR
172 }
173 }
08fe2e33 174
7445f856
NR
175 if (requireInfo() && (!tmp.exists() || !tmpInfo.exists())) {
176 throw new IOException(
177 "file not supported (maybe not created with this program or corrupt)");
178 }
68686a37 179
7445f856 180 if (tmp.exists()) {
67837328 181 this.fakeIn = new MarkableFileInputStream(tmp);
68686a37 182 }
7445f856
NR
183
184 if (tmpInfo.exists()) {
185 meta = InfoReader.readMeta(tmpInfo, true);
7445f856
NR
186 tmpInfo.delete();
187 } else {
188 if (title == null || title.isEmpty()) {
189 title = getSourceFileOriginal().getName();
190 if (title.toLowerCase().endsWith(".cbz")) {
191 title = title.substring(0, title.length() - 4);
192 }
193 title = URLDecoder.decode(title, "UTF-8").trim();
778d8d85 194 }
778d8d85 195
7445f856
NR
196 meta = new MetaData();
197 meta.setLang("en");
198 meta.setTags(new ArrayList<String>());
727108fe 199 meta.setSource(getType().getSourceName());
7445f856
NR
200 meta.setUuid(url);
201 meta.setUrl(url);
202 meta.setTitle(title);
203 meta.setAuthor(author);
204 meta.setImageDocument(isImagesDocumentByDefault());
205 }
bb7021f2
NR
206
207 if (meta.getCover() == null) {
208 if (cover != null) {
209 meta.setCover(cover);
210 } else {
076caecc
NR
211 meta.setCover(InfoReader.getCoverByName(
212 getSourceFileOriginal().toURI().toURL()));
bb7021f2
NR
213 }
214 }
7445f856
NR
215 } finally {
216 if (zipIn != null) {
217 zipIn.close();
218 }
219 if (in != null) {
220 in.close();
221 }
08fe2e33 222 }
7445f856
NR
223
224 return null;
08fe2e33
NR
225 }
226
227 @Override
0ffa4754 228 protected void close() {
2aac79c7
NR
229 if (tmpDir != null) {
230 IOUtils.deltree(tmpDir);
08fe2e33
NR
231 }
232
2aac79c7 233 tmpDir = null;
dea63313 234
68686a37 235 super.close();
08fe2e33
NR
236 }
237
238 protected String getDataPrefix() {
239 return "DATA/";
240 }
241
242 protected boolean requireInfo() {
243 return true;
244 }
245
246 protected boolean getCover() {
247 return true;
248 }
e4fa48a0
NR
249
250 protected boolean isImagesDocumentByDefault() {
251 return false;
252 }
08fe2e33 253}