Merge branch 'master' into subtree
[nikiroo-utils.git] / supported / Epub.java
CommitLineData
08fe2e33
NR
1package be.nikiroo.fanfix.supported;
2
08fe2e33 3import java.io.File;
08fe2e33
NR
4import java.io.IOException;
5import java.io.InputStream;
7445f856 6import java.net.URISyntaxException;
08fe2e33 7import java.net.URL;
778d8d85 8import java.net.URLDecoder;
68686a37 9import java.util.ArrayList;
15da4d0a
NR
10import java.util.Arrays;
11import java.util.Collections;
08fe2e33
NR
12import java.util.zip.ZipEntry;
13import java.util.zip.ZipInputStream;
14
7445f856
NR
15import org.jsoup.nodes.Document;
16
08fe2e33 17import be.nikiroo.fanfix.Instance;
68686a37 18import be.nikiroo.fanfix.data.MetaData;
08fe2e33 19import be.nikiroo.utils.IOUtils;
16a81ef7 20import be.nikiroo.utils.Image;
b7afbe42 21import be.nikiroo.utils.StringUtils;
8d59ce07 22import be.nikiroo.utils.streams.MarkableFileInputStream;
08fe2e33
NR
23
24/**
25 * Support class for EPUB files created with this program (as we need some
26 * metadata available in those we create).
27 *
28 * @author niki
29 */
68686a37 30class Epub extends InfoText {
7445f856 31 private MetaData meta;
2aac79c7 32 private File tmpDir;
b7afbe42 33 private String desc;
08fe2e33 34
68686a37
NR
35 private URL fakeSource;
36 private InputStream fakeIn;
08fe2e33 37
7445f856 38 public File getSourceFileOriginal() {
298d405a 39 return super.getSourceFile();
08fe2e33
NR
40 }
41
42 @Override
7445f856
NR
43 protected File getSourceFile() {
44 try {
45 return new File(fakeSource.toURI());
46 } catch (URISyntaxException e) {
076caecc
NR
47 Instance.getInstance().getTraceHandler().error(new IOException(
48 "Cannot get the source file from the info-text URL", e));
08fe2e33
NR
49 }
50
51 return null;
52 }
53
54 @Override
7445f856 55 protected InputStream getInput() {
298d405a
NR
56 if (fakeIn != null) {
57 try {
58 fakeIn.reset();
59 } catch (IOException e) {
076caecc
NR
60 Instance.getInstance().getTraceHandler().error(new IOException(
61 "Cannot reset the Epub Text stream", e));
298d405a
NR
62 }
63
64 return fakeIn;
65 }
66
67 return null;
08fe2e33
NR
68 }
69
70 @Override
7445f856
NR
71 protected boolean supports(URL url) {
72 return url.getPath().toLowerCase().endsWith(".epub");
73 }
08fe2e33 74
7445f856
NR
75 @Override
76 protected MetaData getMeta() throws IOException {
77 return meta;
08fe2e33
NR
78 }
79
80 @Override
7445f856
NR
81 protected Document loadDocument(URL source) throws IOException {
82 super.loadDocument(source); // prepares super.getSourceFile() and
83 // super.getInput()
84
85 InputStream in = super.getInput();
86 ZipInputStream zipIn = null;
87 try {
88 zipIn = new ZipInputStream(in);
076caecc
NR
89 tmpDir = Instance.getInstance().getTempFiles()
90 .createTempDir("fanfic-reader-parser");
7445f856
NR
91 File tmp = new File(tmpDir, "file.txt");
92 File tmpInfo = new File(tmpDir, "file.info");
93
94 fakeSource = tmp.toURI().toURL();
95 Image cover = null;
96
97 String url;
98 try {
99 url = getSource().toURI().toURL().toString();
100 } catch (URISyntaxException e1) {
101 url = getSource().toString();
102 }
103 String title = null;
104 String author = null;
105
076caecc
NR
106 for (ZipEntry entry = zipIn
107 .getNextEntry(); entry != null; entry = zipIn
108 .getNextEntry()) {
7445f856
NR
109 if (!entry.isDirectory()
110 && entry.getName().startsWith(getDataPrefix())) {
111 String entryLName = entry.getName().toLowerCase();
15da4d0a 112 entryLName = entryLName.substring(getDataPrefix().length());
7445f856
NR
113
114 boolean imageEntry = false;
8d59ce07 115 for (String ext : bsImages.getImageExt(false)) {
7445f856
NR
116 if (entryLName.endsWith(ext)) {
117 imageEntry = true;
118 }
08fe2e33 119 }
08fe2e33 120
15da4d0a 121 if (entryLName.equals("version")) {
7445f856
NR
122 // Nothing to do for now ("first"
123 // version is 3.0)
124 } else if (entryLName.endsWith(".info")) {
125 // Info file
126 IOUtils.write(zipIn, tmpInfo);
127 } else if (imageEntry) {
128 // Cover
15da4d0a 129 if (getCover() && cover == null) {
7445f856 130 try {
002972e9
NR
131 Image img = new Image(zipIn);
132 if (img.getSize() == 0) {
133 img.close();
134 throw new IOException(
135 "Empty image not accepted");
136 }
137 cover = img;
7445f856 138 } catch (Exception e) {
076caecc
NR
139 Instance.getInstance().getTraceHandler()
140 .error(e);
7445f856 141 }
08fe2e33 142 }
15da4d0a 143 } else if (entryLName.equals("url")) {
7445f856
NR
144 String[] descArray = StringUtils
145 .unhtml(IOUtils.readSmallStream(zipIn)).trim()
146 .split("\n");
147 if (descArray.length > 0) {
148 url = descArray[0].trim();
149 }
15da4d0a 150 } else if (entryLName.endsWith(".desc")) {
076caecc
NR
151 // // For old files
152 // if (this.desc != null) {
153 // this.desc = IOUtils.readSmallStream(zipIn).trim();
154 // }
15da4d0a 155 } else if (entryLName.equals("summary")) {
7445f856
NR
156 String[] descArray = StringUtils
157 .unhtml(IOUtils.readSmallStream(zipIn)).trim()
158 .split("\n");
159 int skip = 0;
160 if (descArray.length > 1) {
161 title = descArray[0].trim();
162 skip = 1;
163 if (descArray.length > 2
164 && descArray[1].startsWith("©")) {
165 author = descArray[1].substring(1).trim();
166 skip = 2;
167 }
168 }
076caecc
NR
169 // this.desc = "";
170 // for (int i = skip; i < descArray.length; i++) {
171 // this.desc += descArray[i].trim() + "\n";
172 // }
173 //
174 // this.desc = this.desc.trim();
7445f856
NR
175 } else {
176 // Hopefully the data file
177 IOUtils.write(zipIn, tmp);
178 }
08fe2e33
NR
179 }
180 }
08fe2e33 181
15da4d0a 182 if (requireInfo() && !tmp.exists()) {
7445f856
NR
183 throw new IOException(
184 "file not supported (maybe not created with this program or corrupt)");
185 }
68686a37 186
7445f856 187 if (tmp.exists()) {
67837328 188 this.fakeIn = new MarkableFileInputStream(tmp);
68686a37 189 }
7445f856
NR
190
191 if (tmpInfo.exists()) {
192 meta = InfoReader.readMeta(tmpInfo, true);
7445f856
NR
193 tmpInfo.delete();
194 } else {
195 if (title == null || title.isEmpty()) {
196 title = getSourceFileOriginal().getName();
15da4d0a
NR
197 String exts[] = new String[] {".epub", ".cbz"};
198 for (String ext : exts) {
199 if (title.toLowerCase().endsWith(ext)) {
200 title = title.substring(0,
201 title.length() - ext.length());
202 }
7445f856
NR
203 }
204 title = URLDecoder.decode(title, "UTF-8").trim();
778d8d85 205 }
778d8d85 206
7445f856
NR
207 meta = new MetaData();
208 meta.setLang("en");
15da4d0a 209 meta.setTags(Arrays.asList("[no_info]"));
727108fe 210 meta.setSource(getType().getSourceName());
7445f856
NR
211 meta.setUuid(url);
212 meta.setUrl(url);
213 meta.setTitle(title);
214 meta.setAuthor(author);
215 meta.setImageDocument(isImagesDocumentByDefault());
15da4d0a
NR
216
217 InfoReader.completeMeta(tmp, meta);
7445f856 218 }
bb7021f2
NR
219
220 if (meta.getCover() == null) {
221 if (cover != null) {
222 meta.setCover(cover);
223 } else {
076caecc
NR
224 meta.setCover(InfoReader.getCoverByName(
225 getSourceFileOriginal().toURI().toURL()));
bb7021f2
NR
226 }
227 }
7445f856
NR
228 } finally {
229 if (zipIn != null) {
230 zipIn.close();
231 }
232 if (in != null) {
233 in.close();
234 }
08fe2e33 235 }
7445f856
NR
236
237 return null;
08fe2e33
NR
238 }
239
240 @Override
0ffa4754 241 protected void close() {
2aac79c7
NR
242 if (tmpDir != null) {
243 IOUtils.deltree(tmpDir);
08fe2e33
NR
244 }
245
2aac79c7 246 tmpDir = null;
dea63313 247
68686a37 248 super.close();
08fe2e33
NR
249 }
250
251 protected String getDataPrefix() {
252 return "DATA/";
253 }
254
255 protected boolean requireInfo() {
256 return true;
257 }
258
259 protected boolean getCover() {
260 return true;
261 }
e4fa48a0
NR
262
263 protected boolean isImagesDocumentByDefault() {
264 return false;
265 }
08fe2e33 266}