eHentai content warning fixed
[fanfix.git] / src / be / nikiroo / fanfix / supported / EHentai.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.MalformedURLException;
6 import java.net.URL;
7 import java.util.AbstractMap;
8 import java.util.ArrayList;
9 import java.util.List;
10 import java.util.Map;
11 import java.util.Map.Entry;
12 import java.util.Scanner;
13
14 import be.nikiroo.fanfix.Instance;
15 import be.nikiroo.fanfix.data.Chapter;
16 import be.nikiroo.fanfix.data.MetaData;
17 import be.nikiroo.fanfix.data.Story;
18 import be.nikiroo.utils.Image;
19 import be.nikiroo.utils.Progress;
20 import be.nikiroo.utils.StringUtils;
21
22 /**
23 * Support class for <a href="https://e-hentai.org/">e-hentai.org</a>, a website
24 * supporting mostly but not always NSFW comics, including some of MLP.
25 *
26 * @author niki
27 */
28 class EHentai extends BasicSupport_Deprecated {
29 @Override
30 public String getSourceName() {
31 return "e-hentai.org";
32 }
33
34 @Override
35 protected MetaData getMeta(URL source, InputStream in) throws IOException {
36 MetaData meta = new MetaData();
37
38 meta.setTitle(getTitle(reset(in)));
39 meta.setAuthor(getAuthor(reset(in)));
40 meta.setDate(getDate(reset(in)));
41 meta.setTags(getTags(reset(in)));
42 meta.setSource(getSourceName());
43 meta.setUrl(source.toString());
44 meta.setPublisher(getSourceName());
45 meta.setUuid(source.toString());
46 meta.setLuid("");
47 meta.setLang(getLang(reset(in)));
48 meta.setSubject("Hentai");
49 meta.setType(getType().toString());
50 meta.setImageDocument(true);
51 meta.setCover(getCover(source, reset(in)));
52 meta.setFakeCover(true);
53
54 return meta;
55 }
56
57 @Override
58 public Story process(URL url, Progress pg) throws IOException {
59 // There is no chapters on e621, just pagination...
60 Story story = super.process(url, pg);
61
62 Chapter only = new Chapter(1, null);
63 for (Chapter chap : story) {
64 only.getParagraphs().addAll(chap.getParagraphs());
65 }
66
67 story.getChapters().clear();
68 story.getChapters().add(only);
69
70 return story;
71 }
72
73 @Override
74 protected boolean supports(URL url) {
75 return "e-hentai.org".equals(url.getHost());
76 }
77
78 @Override
79 protected boolean isHtml() {
80 return true;
81 }
82
83 @Override
84 public Map<String, String> getCookies() {
85 // TODO Auto-generated method stub
86 Map<String, String> cookies = super.getCookies();
87 cookies.put("nw", "1");
88 return cookies;
89 }
90
91 private Image getCover(URL source, InputStream in) {
92 Image author = null;
93 String coverLine = getKeyLine(in, "<div id=\"gd1\"", " url(", ")");
94 if (coverLine != null) {
95 coverLine = StringUtils.unhtml(coverLine).trim();
96 author = getImage(this, source, coverLine);
97 }
98
99 return author;
100 }
101
102 private String getAuthor(InputStream in) {
103 String author = null;
104
105 List<String> tagsAuthor = getTagsAuthor(in);
106 if (!tagsAuthor.isEmpty()) {
107 author = tagsAuthor.get(0);
108 }
109
110 return author;
111 }
112
113 private String getLang(InputStream in) {
114 String lang = null;
115
116 String langLine = getKeyLine(in, "class=\"gdt1\">Language",
117 "class=\"gdt2\"", "</td>");
118 if (langLine != null) {
119 langLine = StringUtils.unhtml(langLine).trim();
120 if (langLine.equalsIgnoreCase("English")) {
121 lang = "en";
122 } else if (langLine.equalsIgnoreCase("Japanese")) {
123 lang = "jp";
124 } else if (langLine.equalsIgnoreCase("French")) {
125 lang = "fr";
126 } else {
127 // TODO find the code?
128 lang = langLine;
129 }
130 }
131
132 return lang;
133 }
134
135 private String getDate(InputStream in) {
136 String date = null;
137
138 String dateLine = getKeyLine(in, "class=\"gdt1\">Posted",
139 "class=\"gdt2\"", "</td>");
140 if (dateLine != null) {
141 dateLine = StringUtils.unhtml(dateLine).trim();
142 if (dateLine.length() > 10) {
143 dateLine = dateLine.substring(0, 10).trim();
144 }
145
146 date = dateLine;
147 }
148
149 return date;
150 }
151
152 private List<String> getTags(InputStream in) {
153 List<String> tags = new ArrayList<String>();
154 List<String> tagsAuthor = getTagsAuthor(in);
155
156 for (int i = 1; i < tagsAuthor.size(); i++) {
157 tags.add(tagsAuthor.get(i));
158 }
159
160 return tags;
161 }
162
163 private List<String> getTagsAuthor(InputStream in) {
164 List<String> tags = new ArrayList<String>();
165 String tagLine = getKeyLine(in, "<meta name=\"description\"", "Tags: ",
166 null);
167 if (tagLine != null) {
168 for (String tag : tagLine.split(",")) {
169 String candi = tag.trim();
170 if (!candi.isEmpty() && !tags.contains(candi)) {
171 tags.add(candi);
172 }
173 }
174 }
175
176 return tags;
177 }
178
179 private String getTitle(InputStream in) {
180 String siteName = " - E-Hentai Galleries";
181
182 String title = getLine(in, "<title>", 0);
183 if (title != null) {
184 title = StringUtils.unhtml(title).trim();
185 if (title.endsWith(siteName)) {
186 title = title.substring(0, title.length() - siteName.length())
187 .trim();
188 }
189 }
190
191 return title;
192 }
193
194 @Override
195 protected String getDesc(URL source, InputStream in) throws IOException {
196 String desc = null;
197
198 String descLine = getKeyLine(in, "Uploader Comment", null,
199 "<div class=\"c7\"");
200 if (descLine != null) {
201 desc = StringUtils.unhtml(descLine);
202 }
203
204 return desc;
205 }
206
207 @Override
208 protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
209 Progress pg) throws IOException {
210 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
211 int last = 0; // no pool/show when only one page, first page == page 0
212
213 @SuppressWarnings("resource")
214 Scanner scan = new Scanner(in, "UTF-8");
215 scan.useDelimiter(">");
216 while (scan.hasNext()) {
217 String line = scan.next();
218 if (line.contains(source.toString())) {
219 String page = line.substring(line.indexOf(source.toString()));
220 String pkey = "?p=";
221 if (page.contains(pkey)) {
222 page = page.substring(page.indexOf(pkey) + pkey.length());
223 String number = "";
224 while (!page.isEmpty() && page.charAt(0) >= '0'
225 && page.charAt(0) <= '9') {
226 number += page.charAt(0);
227 page = page.substring(1);
228 }
229 if (number.isEmpty()) {
230 number = "0";
231 }
232
233 int current = Integer.parseInt(number);
234 if (last < current) {
235 last = current;
236 }
237 }
238 }
239 }
240
241 for (int i = 0; i <= last; i++) {
242 urls.add(new AbstractMap.SimpleEntry<String, URL>(Integer
243 .toString(i + 1), new URL(source.toString() + "?p=" + i)));
244 }
245
246 return urls;
247 }
248
249 @Override
250 protected String getChapterContent(URL source, InputStream in, int number,
251 Progress pg) throws IOException {
252 String staticSite = "https://e-hentai.org/s/";
253 List<URL> pages = new ArrayList<URL>();
254
255 @SuppressWarnings("resource")
256 Scanner scan = new Scanner(in, "UTF-8");
257 scan.useDelimiter("\"");
258 while (scan.hasNext()) {
259 String line = scan.next();
260 if (line.startsWith(staticSite)) {
261 try {
262 pages.add(new URL(line));
263 } catch (MalformedURLException e) {
264 Instance.getTraceHandler().error(
265 new IOException(
266 "Parsing error, a link is not correctly parsed: "
267 + line, e));
268 }
269 }
270 }
271
272 if (pg == null) {
273 pg = new Progress();
274 }
275 pg.setMinMax(0, pages.size());
276 pg.setProgress(0);
277
278 StringBuilder builder = new StringBuilder();
279
280 for (URL page : pages) {
281 InputStream pageIn = Instance.getCache().open(page, this, false);
282 try {
283 String link = getKeyLine(pageIn, "id=\"img\"", "src=\"", "\"");
284 if (link != null && !link.isEmpty()) {
285 builder.append("[");
286 builder.append(link);
287 builder.append("]<br/>");
288 }
289 pg.add(1);
290 } finally {
291 if (pageIn != null) {
292 pageIn.close();
293 }
294 }
295 }
296
297 pg.done();
298 return builder.toString();
299 }
300 }