Fix date/language meta data on e-hentai
[fanfix.git] / src / be / nikiroo / fanfix / supported / EHentai.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.awt.image.BufferedImage;
4 import java.io.IOException;
5 import java.io.InputStream;
6 import java.net.MalformedURLException;
7 import java.net.URL;
8 import java.util.ArrayList;
9 import java.util.List;
10 import java.util.Map.Entry;
11 import java.util.Scanner;
12
13 import be.nikiroo.fanfix.Instance;
14 import be.nikiroo.fanfix.data.Chapter;
15 import be.nikiroo.fanfix.data.MetaData;
16 import be.nikiroo.fanfix.data.Story;
17 import be.nikiroo.utils.Progress;
18 import be.nikiroo.utils.StringUtils;
19
20 /**
21 * Support class for <a href="https://e-hentai.org/">e-hentai.org</a>, a website
22 * supporting mostly but not always NSFW comics, including some of MLP.
23 *
24 * @author niki
25 */
26 class EHentai extends BasicSupport {
27 @Override
28 public String getSourceName() {
29 return "e-hentai.org";
30 }
31
32 @Override
33 protected MetaData getMeta(URL source, InputStream in) throws IOException {
34 MetaData meta = new MetaData();
35
36 meta.setTitle(getTitle(reset(in)));
37 meta.setAuthor(getAuthor(reset(in)));
38 meta.setDate(getDate(reset(in)));
39 meta.setTags(getTags(reset(in)));
40 meta.setSource(getSourceName());
41 meta.setUrl(source.toString());
42 meta.setPublisher(getSourceName());
43 meta.setUuid(source.toString());
44 meta.setLuid("");
45 meta.setLang(getLang(reset(in)));
46 meta.setSubject("Hentai");
47 meta.setType(getType().toString());
48 meta.setImageDocument(true);
49 meta.setCover(getCover(source, reset(in)));
50 meta.setFakeCover(true);
51
52 return meta;
53 }
54
55 @Override
56 public Story process(URL url, Progress pg) throws IOException {
57 // There is no chapters on e621, just pagination...
58 Story story = super.process(url, pg);
59
60 Chapter only = new Chapter(1, null);
61 for (Chapter chap : story) {
62 only.getParagraphs().addAll(chap.getParagraphs());
63 }
64
65 story.getChapters().clear();
66 story.getChapters().add(only);
67
68 return story;
69 }
70
71 @Override
72 protected boolean supports(URL url) {
73 return "e-hentai.org".equals(url.getHost());
74 }
75
76 @Override
77 protected boolean isHtml() {
78 return true;
79 }
80
81 private BufferedImage getCover(URL source, InputStream in)
82 throws IOException {
83 BufferedImage author = null;
84 String coverLine = getKeyLine(in, "<div id=\"gd1\"", " url(", ")");
85 if (coverLine != null) {
86 coverLine = StringUtils.unhtml(coverLine).trim();
87 author = getImage(this, source, coverLine);
88 }
89
90 return author;
91 }
92
93 private String getAuthor(InputStream in) {
94 String author = null;
95
96 List<String> tagsAuthor = getTagsAuthor(in);
97 if (!tagsAuthor.isEmpty()) {
98 author = tagsAuthor.get(0);
99 }
100
101 return author;
102 }
103
104 private String getLang(InputStream in) {
105 String lang = null;
106
107 String langLine = getKeyLine(in, "class=\"gdt1\">Language",
108 "class=\"gdt2\"", "</td>");
109 if (langLine != null) {
110 langLine = StringUtils.unhtml(langLine).trim();
111 if (langLine.equalsIgnoreCase("English")) {
112 lang = "EN";
113 } else if (langLine.equalsIgnoreCase("Japanese")) {
114 lang = "JP";
115 } else if (langLine.equalsIgnoreCase("French")) {
116 lang = "FR";
117 } else {
118 // TODO find the code?
119 lang = langLine;
120 }
121 }
122
123 return lang;
124 }
125
126 private String getDate(InputStream in) {
127 String date = null;
128
129 String dateLine = getKeyLine(in, "class=\"gdt1\">Posted",
130 "class=\"gdt2\"", "</td>");
131 if (dateLine != null) {
132 dateLine = StringUtils.unhtml(dateLine).trim();
133 if (dateLine.length() > 10) {
134 dateLine = dateLine.substring(0, 10).trim();
135 }
136
137 date = dateLine;
138 }
139
140 return date;
141 }
142
143 private List<String> getTags(InputStream in) {
144 List<String> tags = new ArrayList<String>();
145 List<String> tagsAuthor = getTagsAuthor(in);
146
147 for (int i = 1; i < tagsAuthor.size(); i++) {
148 tags.add(tagsAuthor.get(i));
149 }
150
151 return tags;
152 }
153
154 private List<String> getTagsAuthor(InputStream in) {
155 List<String> tags = new ArrayList<String>();
156 String tagLine = getKeyLine(in, "<meta name=\"description\"", "Tags: ",
157 null);
158 if (tagLine != null) {
159 for (String tag : tagLine.split(",")) {
160 String candi = tag.trim();
161 if (!candi.isEmpty() && !tags.contains(candi)) {
162 tags.add(candi);
163 }
164 }
165 }
166
167 return tags;
168 }
169
170 private String getTitle(InputStream in) throws IOException {
171 String siteName = " - E-Hentai Galleries";
172
173 String title = getLine(in, "<title>", 0);
174 if (title != null) {
175 title = StringUtils.unhtml(title).trim();
176 if (title.endsWith(siteName)) {
177 title = title.substring(0, title.length() - siteName.length())
178 .trim();
179 }
180 }
181
182 return title;
183 }
184
185 @Override
186 protected String getDesc(URL source, InputStream in) throws IOException {
187 String desc = null;
188
189 String descLine = getKeyLine(in, "Uploader Comment", null,
190 "<div class=\"c7\"");
191 if (descLine != null) {
192 desc = StringUtils.unhtml(descLine);
193 }
194
195 return desc;
196 }
197
198 @Override
199 protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
200 Progress pg) throws IOException {
201 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
202 int last = 0; // no pool/show when only one page, first page == page 0
203
204 @SuppressWarnings("resource")
205 Scanner scan = new Scanner(in, "UTF-8");
206 scan.useDelimiter(">");
207 while (scan.hasNext()) {
208 String line = scan.next();
209 if (line.contains(source.toString())) {
210 String page = line.substring(line.indexOf(source.toString()));
211 String pkey = "?p=";
212 if (page.contains(pkey)) {
213 page = page.substring(page.indexOf(pkey) + pkey.length());
214 String number = "";
215 while (!page.isEmpty() && page.charAt(0) >= '0'
216 && page.charAt(0) <= '9') {
217 number += page.charAt(0);
218 page = page.substring(1);
219 }
220 if (number.isEmpty()) {
221 number = "0";
222 }
223
224 int current = Integer.parseInt(number);
225 if (last < current) {
226 last = current;
227 }
228 }
229 }
230 }
231
232 for (int i = 0; i <= last; i++) {
233 final String key = Integer.toString(i + 1);
234 final URL value = new URL(source.toString() + "?p=" + i);
235 urls.add(new Entry<String, URL>() {
236 public URL setValue(URL value) {
237 return null;
238 }
239
240 public URL getValue() {
241 return value;
242 }
243
244 public String getKey() {
245 return key;
246 }
247 });
248 }
249
250 return urls;
251 }
252
253 @Override
254 protected String getChapterContent(URL source, InputStream in, int number,
255 Progress pg) throws IOException {
256 String staticSite = "https://e-hentai.org/s/";
257 List<URL> pages = new ArrayList<URL>();
258
259 @SuppressWarnings("resource")
260 Scanner scan = new Scanner(in, "UTF-8");
261 scan.useDelimiter("\"");
262 while (scan.hasNext()) {
263 String line = scan.next();
264 if (line.startsWith(staticSite)) {
265 try {
266 pages.add(new URL(line));
267 } catch (MalformedURLException e) {
268 Instance.syserr(new IOException(
269 "Parsing error, a link is not correctly parsed: "
270 + line, e));
271 }
272 }
273 }
274
275 if (pg == null) {
276 pg = new Progress();
277 }
278 pg.setMinMax(0, pages.size());
279 pg.setProgress(0);
280
281 StringBuilder builder = new StringBuilder();
282
283 for (URL page : pages) {
284 InputStream pageIn = Instance.getCache().open(page, this, false);
285 try {
286 String link = getKeyLine(pageIn, "id=\"img\"", "src=\"", "\"");
287 if (link != null && !link.isEmpty()) {
288 builder.append("[");
289 builder.append(link);
290 builder.append("]<br/>");
291 }
292 pg.add(1);
293 } finally {
294 if (pageIn != null) {
295 pageIn.close();
296 }
297 }
298 }
299
300 pg.done();
301 return builder.toString();
302 }
303 }