fix see word count on source/author, step 2
[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / Epub.java
CommitLineData
08fe2e33
NR
1package be.nikiroo.fanfix.supported;
2
08fe2e33
NR
3import java.io.File;
4import java.io.FileInputStream;
5import java.io.IOException;
6import java.io.InputStream;
7445f856 7import java.net.URISyntaxException;
08fe2e33 8import java.net.URL;
778d8d85 9import java.net.URLDecoder;
68686a37 10import java.util.ArrayList;
08fe2e33
NR
11import java.util.zip.ZipEntry;
12import java.util.zip.ZipInputStream;
13
7445f856
NR
14import org.jsoup.nodes.Document;
15
08fe2e33 16import be.nikiroo.fanfix.Instance;
68686a37 17import be.nikiroo.fanfix.data.MetaData;
08fe2e33 18import be.nikiroo.utils.IOUtils;
16a81ef7 19import be.nikiroo.utils.Image;
08fe2e33 20import be.nikiroo.utils.MarkableFileInputStream;
b7afbe42 21import be.nikiroo.utils.StringUtils;
08fe2e33
NR
22
23/**
24 * Support class for EPUB files created with this program (as we need some
25 * metadata available in those we create).
26 *
27 * @author niki
28 */
68686a37 29class Epub extends InfoText {
7445f856 30 private MetaData meta;
2aac79c7 31 private File tmpDir;
b7afbe42 32 private String desc;
08fe2e33 33
68686a37
NR
34 private URL fakeSource;
35 private InputStream fakeIn;
08fe2e33
NR
36
37 @Override
38 public String getSourceName() {
39 return "epub";
40 }
41
7445f856 42 public File getSourceFileOriginal() {
298d405a 43 return super.getSourceFile();
08fe2e33
NR
44 }
45
46 @Override
7445f856
NR
47 protected File getSourceFile() {
48 try {
49 return new File(fakeSource.toURI());
50 } catch (URISyntaxException e) {
51 Instance.getTraceHandler()
52 .error(new IOException(
53 "Cannot get the source file from the info-text URL",
54 e));
08fe2e33
NR
55 }
56
57 return null;
58 }
59
60 @Override
7445f856 61 protected InputStream getInput() {
298d405a
NR
62 if (fakeIn != null) {
63 try {
64 fakeIn.reset();
65 } catch (IOException e) {
66 Instance.getTraceHandler()
67 .error(new IOException(
68 "Cannot reset the Epub Text stream", e));
69 }
70
71 return fakeIn;
72 }
73
74 return null;
08fe2e33
NR
75 }
76
77 @Override
7445f856
NR
78 protected boolean supports(URL url) {
79 return url.getPath().toLowerCase().endsWith(".epub");
80 }
08fe2e33 81
7445f856
NR
82 @Override
83 protected MetaData getMeta() throws IOException {
84 return meta;
08fe2e33
NR
85 }
86
87 @Override
7445f856
NR
88 protected Document loadDocument(URL source) throws IOException {
89 super.loadDocument(source); // prepares super.getSourceFile() and
90 // super.getInput()
91
92 InputStream in = super.getInput();
93 ZipInputStream zipIn = null;
94 try {
95 zipIn = new ZipInputStream(in);
96 tmpDir = Instance.getTempFiles().createTempDir(
97 "fanfic-reader-parser");
98 File tmp = new File(tmpDir, "file.txt");
99 File tmpInfo = new File(tmpDir, "file.info");
100
101 fakeSource = tmp.toURI().toURL();
102 Image cover = null;
103
104 String url;
105 try {
106 url = getSource().toURI().toURL().toString();
107 } catch (URISyntaxException e1) {
108 url = getSource().toString();
109 }
110 String title = null;
111 String author = null;
112
113 for (ZipEntry entry = zipIn.getNextEntry(); entry != null; entry = zipIn
114 .getNextEntry()) {
115 if (!entry.isDirectory()
116 && entry.getName().startsWith(getDataPrefix())) {
117 String entryLName = entry.getName().toLowerCase();
118
119 boolean imageEntry = false;
120 for (String ext : BasicSupportImages.getImageExt(false)) {
121 if (entryLName.endsWith(ext)) {
122 imageEntry = true;
123 }
08fe2e33 124 }
08fe2e33 125
7445f856
NR
126 if (entry.getName().equals(getDataPrefix() + "version")) {
127 // Nothing to do for now ("first"
128 // version is 3.0)
129 } else if (entryLName.endsWith(".info")) {
130 // Info file
131 IOUtils.write(zipIn, tmpInfo);
132 } else if (imageEntry) {
133 // Cover
134 if (getCover()) {
135 try {
136 cover = new Image(zipIn);
137 } catch (Exception e) {
138 Instance.getTraceHandler().error(e);
139 }
08fe2e33 140 }
7445f856
NR
141 } else if (entry.getName().equals(getDataPrefix() + "URL")) {
142 String[] descArray = StringUtils
143 .unhtml(IOUtils.readSmallStream(zipIn)).trim()
144 .split("\n");
145 if (descArray.length > 0) {
146 url = descArray[0].trim();
147 }
148 } else if (entry.getName().equals(
149 getDataPrefix() + "SUMMARY")) {
150 String[] descArray = StringUtils
151 .unhtml(IOUtils.readSmallStream(zipIn)).trim()
152 .split("\n");
153 int skip = 0;
154 if (descArray.length > 1) {
155 title = descArray[0].trim();
156 skip = 1;
157 if (descArray.length > 2
158 && descArray[1].startsWith("©")) {
159 author = descArray[1].substring(1).trim();
160 skip = 2;
161 }
162 }
163 this.desc = "";
164 for (int i = skip; i < descArray.length; i++) {
165 this.desc += descArray[i].trim() + "\n";
b7afbe42 166 }
b7afbe42 167
7445f856
NR
168 this.desc = this.desc.trim();
169 } else {
170 // Hopefully the data file
171 IOUtils.write(zipIn, tmp);
172 }
08fe2e33
NR
173 }
174 }
08fe2e33 175
7445f856
NR
176 if (requireInfo() && (!tmp.exists() || !tmpInfo.exists())) {
177 throw new IOException(
178 "file not supported (maybe not created with this program or corrupt)");
179 }
68686a37 180
7445f856
NR
181 if (tmp.exists()) {
182 this.fakeIn = new MarkableFileInputStream(new FileInputStream(
183 tmp));
68686a37 184 }
7445f856
NR
185
186 if (tmpInfo.exists()) {
187 meta = InfoReader.readMeta(tmpInfo, true);
7445f856
NR
188 tmpInfo.delete();
189 } else {
190 if (title == null || title.isEmpty()) {
191 title = getSourceFileOriginal().getName();
192 if (title.toLowerCase().endsWith(".cbz")) {
193 title = title.substring(0, title.length() - 4);
194 }
195 title = URLDecoder.decode(title, "UTF-8").trim();
778d8d85 196 }
778d8d85 197
7445f856
NR
198 meta = new MetaData();
199 meta.setLang("en");
200 meta.setTags(new ArrayList<String>());
201 meta.setSource(getSourceName());
202 meta.setUuid(url);
203 meta.setUrl(url);
204 meta.setTitle(title);
205 meta.setAuthor(author);
206 meta.setImageDocument(isImagesDocumentByDefault());
207 }
bb7021f2
NR
208
209 if (meta.getCover() == null) {
210 if (cover != null) {
211 meta.setCover(cover);
212 } else {
bb7021f2
NR
213 meta.setCover(InfoReader
214 .getCoverByName(getSourceFileOriginal().toURI()
215 .toURL()));
216 }
217 }
7445f856
NR
218 } finally {
219 if (zipIn != null) {
220 zipIn.close();
221 }
222 if (in != null) {
223 in.close();
224 }
08fe2e33 225 }
7445f856
NR
226
227 return null;
08fe2e33
NR
228 }
229
230 @Override
0ffa4754 231 protected void close() {
2aac79c7
NR
232 if (tmpDir != null) {
233 IOUtils.deltree(tmpDir);
08fe2e33
NR
234 }
235
2aac79c7 236 tmpDir = null;
dea63313 237
68686a37 238 super.close();
08fe2e33
NR
239 }
240
241 protected String getDataPrefix() {
242 return "DATA/";
243 }
244
245 protected boolean requireInfo() {
246 return true;
247 }
248
249 protected boolean getCover() {
250 return true;
251 }
e4fa48a0
NR
252
253 protected boolean isImagesDocumentByDefault() {
254 return false;
255 }
08fe2e33 256}