e5261d36b7a9e291ebfd16b303f22cf51d9b35db
[fanfix.git] / src / be / nikiroo / fanfix / supported / Epub.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.File;
4 import java.io.FileInputStream;
5 import java.io.IOException;
6 import java.io.InputStream;
7 import java.net.URISyntaxException;
8 import java.net.URL;
9 import java.net.URLDecoder;
10 import java.util.ArrayList;
11 import java.util.zip.ZipEntry;
12 import java.util.zip.ZipInputStream;
13
14 import org.jsoup.nodes.Document;
15
16 import be.nikiroo.fanfix.Instance;
17 import be.nikiroo.fanfix.data.MetaData;
18 import be.nikiroo.utils.IOUtils;
19 import be.nikiroo.utils.Image;
20 import be.nikiroo.utils.MarkableFileInputStream;
21 import be.nikiroo.utils.StringUtils;
22
23 /**
24 * Support class for EPUB files created with this program (as we need some
25 * metadata available in those we create).
26 *
27 * @author niki
28 */
29 class Epub extends InfoText {
30 private MetaData meta;
31 private File tmpDir;
32 private String desc;
33
34 private URL fakeSource;
35 private InputStream fakeIn;
36
37 @Override
38 public String getSourceName() {
39 return "epub";
40 }
41
42 public File getSourceFileOriginal() {
43 return super.getSourceFile();
44 }
45
46 @Override
47 protected File getSourceFile() {
48 try {
49 return new File(fakeSource.toURI());
50 } catch (URISyntaxException e) {
51 Instance.getTraceHandler()
52 .error(new IOException(
53 "Cannot get the source file from the info-text URL",
54 e));
55 }
56
57 return null;
58 }
59
60 @Override
61 protected InputStream getInput() {
62 if (fakeIn != null) {
63 try {
64 fakeIn.reset();
65 } catch (IOException e) {
66 Instance.getTraceHandler()
67 .error(new IOException(
68 "Cannot reset the Epub Text stream", e));
69 }
70
71 return fakeIn;
72 }
73
74 return null;
75 }
76
77 @Override
78 protected boolean supports(URL url) {
79 return url.getPath().toLowerCase().endsWith(".epub");
80 }
81
82 @Override
83 protected MetaData getMeta() throws IOException {
84 return meta;
85 }
86
87 @Override
88 protected Document loadDocument(URL source) throws IOException {
89 super.loadDocument(source); // prepares super.getSourceFile() and
90 // super.getInput()
91
92 InputStream in = super.getInput();
93 ZipInputStream zipIn = null;
94 try {
95 zipIn = new ZipInputStream(in);
96 tmpDir = Instance.getTempFiles().createTempDir(
97 "fanfic-reader-parser");
98 File tmp = new File(tmpDir, "file.txt");
99 File tmpInfo = new File(tmpDir, "file.info");
100
101 fakeSource = tmp.toURI().toURL();
102 Image cover = null;
103
104 String url;
105 try {
106 url = getSource().toURI().toURL().toString();
107 } catch (URISyntaxException e1) {
108 url = getSource().toString();
109 }
110 String title = null;
111 String author = null;
112
113 for (ZipEntry entry = zipIn.getNextEntry(); entry != null; entry = zipIn
114 .getNextEntry()) {
115 if (!entry.isDirectory()
116 && entry.getName().startsWith(getDataPrefix())) {
117 String entryLName = entry.getName().toLowerCase();
118
119 boolean imageEntry = false;
120 for (String ext : BasicSupportImages.getImageExt(false)) {
121 if (entryLName.endsWith(ext)) {
122 imageEntry = true;
123 }
124 }
125
126 if (entry.getName().equals(getDataPrefix() + "version")) {
127 // Nothing to do for now ("first"
128 // version is 3.0)
129 } else if (entryLName.endsWith(".info")) {
130 // Info file
131 IOUtils.write(zipIn, tmpInfo);
132 } else if (imageEntry) {
133 // Cover
134 if (getCover()) {
135 try {
136 cover = new Image(zipIn);
137 } catch (Exception e) {
138 Instance.getTraceHandler().error(e);
139 }
140 }
141 } else if (entry.getName().equals(getDataPrefix() + "URL")) {
142 String[] descArray = StringUtils
143 .unhtml(IOUtils.readSmallStream(zipIn)).trim()
144 .split("\n");
145 if (descArray.length > 0) {
146 url = descArray[0].trim();
147 }
148 } else if (entry.getName().equals(
149 getDataPrefix() + "SUMMARY")) {
150 String[] descArray = StringUtils
151 .unhtml(IOUtils.readSmallStream(zipIn)).trim()
152 .split("\n");
153 int skip = 0;
154 if (descArray.length > 1) {
155 title = descArray[0].trim();
156 skip = 1;
157 if (descArray.length > 2
158 && descArray[1].startsWith("©")) {
159 author = descArray[1].substring(1).trim();
160 skip = 2;
161 }
162 }
163 this.desc = "";
164 for (int i = skip; i < descArray.length; i++) {
165 this.desc += descArray[i].trim() + "\n";
166 }
167
168 this.desc = this.desc.trim();
169 } else {
170 // Hopefully the data file
171 IOUtils.write(zipIn, tmp);
172 }
173 }
174 }
175
176 if (requireInfo() && (!tmp.exists() || !tmpInfo.exists())) {
177 throw new IOException(
178 "file not supported (maybe not created with this program or corrupt)");
179 }
180
181 if (tmp.exists()) {
182 this.fakeIn = new MarkableFileInputStream(new FileInputStream(
183 tmp));
184 }
185
186 if (tmpInfo.exists()) {
187 meta = InfoReader.readMeta(tmpInfo, true);
188 tmpInfo.delete();
189 } else {
190 if (title == null || title.isEmpty()) {
191 title = getSourceFileOriginal().getName();
192 if (title.toLowerCase().endsWith(".cbz")) {
193 title = title.substring(0, title.length() - 4);
194 }
195 title = URLDecoder.decode(title, "UTF-8").trim();
196 }
197
198 meta = new MetaData();
199 meta.setLang("en");
200 meta.setTags(new ArrayList<String>());
201 meta.setSource(getSourceName());
202 meta.setUuid(url);
203 meta.setUrl(url);
204 meta.setTitle(title);
205 meta.setAuthor(author);
206 meta.setImageDocument(isImagesDocumentByDefault());
207 }
208
209 if (meta.getCover() == null) {
210 if (cover != null) {
211 meta.setCover(cover);
212 } else {
213 meta.setCover(InfoReader
214 .getCoverByName(getSourceFileOriginal().toURI()
215 .toURL()));
216 }
217 }
218 } finally {
219 if (zipIn != null) {
220 zipIn.close();
221 }
222 if (in != null) {
223 in.close();
224 }
225 }
226
227 return null;
228 }
229
230 @Override
231 protected void close() {
232 if (tmpDir != null) {
233 IOUtils.deltree(tmpDir);
234 }
235
236 tmpDir = null;
237
238 super.close();
239 }
240
241 protected String getDataPrefix() {
242 return "DATA/";
243 }
244
245 protected boolean requireInfo() {
246 return true;
247 }
248
249 protected boolean getCover() {
250 return true;
251 }
252
253 protected boolean isImagesDocumentByDefault() {
254 return false;
255 }
256 }