merge with master
[nikiroo-utils.git] / supported / EHentai.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.MalformedURLException;
6 import java.net.URL;
7 import java.util.AbstractMap;
8 import java.util.ArrayList;
9 import java.util.List;
10 import java.util.Map;
11 import java.util.Map.Entry;
12 import java.util.Scanner;
13
14 import be.nikiroo.fanfix.Instance;
15 import be.nikiroo.fanfix.data.Chapter;
16 import be.nikiroo.fanfix.data.MetaData;
17 import be.nikiroo.fanfix.data.Story;
18 import be.nikiroo.utils.Image;
19 import be.nikiroo.utils.Progress;
20 import be.nikiroo.utils.StringUtils;
21
22 /**
23 * Support class for <a href="https://e-hentai.org/">e-hentai.org</a>, a website
24 * supporting mostly but not always NSFW comics, including some of MLP.
25 *
26 * @author niki
27 */
28 class EHentai extends BasicSupport_Deprecated {
29 @Override
30 protected MetaData getMeta(URL source, InputStream in) throws IOException {
31 MetaData meta = new MetaData();
32
33 meta.setTitle(getTitle(reset(in)));
34 meta.setAuthor(getAuthor(reset(in)));
35 meta.setDate(getDate(reset(in)));
36 meta.setTags(getTags(reset(in)));
37 meta.setUrl(source.toString());
38 meta.setUuid(source.toString());
39 meta.setLuid("");
40 meta.setLang(getLang(reset(in)));
41 meta.setSubject("Hentai");
42 meta.setImageDocument(true);
43 meta.setCover(getCover(source, reset(in)));
44 meta.setFakeCover(true);
45
46 return meta;
47 }
48
49 @Override
50 public Story process(URL url, Progress pg) throws IOException {
51 // There is no chapters on e621, just pagination...
52 Story story = super.process(url, pg);
53
54 Chapter only = new Chapter(1, "");
55 for (Chapter chap : story) {
56 only.getParagraphs().addAll(chap.getParagraphs());
57 }
58
59 story.getChapters().clear();
60 story.getChapters().add(only);
61
62 return story;
63 }
64
65 @Override
66 protected boolean supports(URL url) {
67 return "e-hentai.org".equals(url.getHost());
68 }
69
70 @Override
71 protected boolean isHtml() {
72 return true;
73 }
74
75 @Override
76 public Map<String, String> getCookies() {
77 Map<String, String> cookies = super.getCookies();
78 cookies.put("nw", "1");
79 return cookies;
80 }
81
82 private Image getCover(URL source, InputStream in) {
83 Image author = null;
84 String coverLine = getKeyLine(in, "<div id=\"gd1\"", " url(", ")");
85 if (coverLine != null) {
86 coverLine = StringUtils.unhtml(coverLine).trim();
87 author = getImage(this, source, coverLine);
88 }
89
90 return author;
91 }
92
93 private String getAuthor(InputStream in) {
94 String author = null;
95
96 List<String> tagsAuthor = getTagsAuthor(in);
97 if (!tagsAuthor.isEmpty()) {
98 author = tagsAuthor.get(0);
99 }
100
101 return author;
102 }
103
104 private String getLang(InputStream in) {
105 String lang = null;
106
107 String langLine = getKeyLine(in, "class=\"gdt1\">Language",
108 "class=\"gdt2\"", "</td>");
109 if (langLine != null) {
110 langLine = StringUtils.unhtml(langLine).trim();
111 if (langLine.equalsIgnoreCase("English")) {
112 lang = "en";
113 } else if (langLine.equalsIgnoreCase("Japanese")) {
114 lang = "jp";
115 } else if (langLine.equalsIgnoreCase("French")) {
116 lang = "fr";
117 } else {
118 // TODO find the code for other languages?
119 lang = langLine;
120 }
121 }
122
123 return lang;
124 }
125
126 private String getDate(InputStream in) {
127 String date = null;
128
129 String dateLine = getKeyLine(in, "class=\"gdt1\">Posted",
130 "class=\"gdt2\"", "</td>");
131 if (dateLine != null) {
132 dateLine = StringUtils.unhtml(dateLine).trim();
133 if (dateLine.length() > 10) {
134 dateLine = dateLine.substring(0, 10).trim();
135 }
136
137 date = dateLine;
138 }
139
140 return date;
141 }
142
143 private List<String> getTags(InputStream in) {
144 List<String> tags = new ArrayList<String>();
145 List<String> tagsAuthor = getTagsAuthor(in);
146
147 for (int i = 1; i < tagsAuthor.size(); i++) {
148 tags.add(tagsAuthor.get(i));
149 }
150
151 return tags;
152 }
153
154 private List<String> getTagsAuthor(InputStream in) {
155 List<String> tags = new ArrayList<String>();
156 String tagLine = getKeyLine(in, "<meta name=\"description\"", "Tags: ",
157 null);
158 if (tagLine != null) {
159 for (String tag : tagLine.split(",")) {
160 String candi = tag.trim();
161 if (!candi.isEmpty() && !tags.contains(candi)) {
162 tags.add(candi);
163 }
164 }
165 }
166
167 return tags;
168 }
169
170 private String getTitle(InputStream in) {
171 String siteName = " - E-Hentai Galleries";
172
173 String title = getLine(in, "<title>", 0);
174 if (title != null) {
175 title = StringUtils.unhtml(title).trim();
176 if (title.endsWith(siteName)) {
177 title = title.substring(0, title.length() - siteName.length())
178 .trim();
179 }
180 }
181
182 return title;
183 }
184
185 @Override
186 protected String getDesc(URL source, InputStream in) throws IOException {
187 String desc = null;
188
189 String descLine = getKeyLine(in, "Uploader Comment", null,
190 "<div class=\"c7\"");
191 if (descLine != null) {
192 desc = StringUtils.unhtml(descLine);
193 }
194
195 return desc;
196 }
197
198 @Override
199 protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
200 Progress pg) throws IOException {
201 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
202 int last = 0; // no pool/show when only one page, first page == page 0
203
204 @SuppressWarnings("resource")
205 Scanner scan = new Scanner(in, "UTF-8");
206 scan.useDelimiter(">");
207 while (scan.hasNext()) {
208 String line = scan.next();
209 if (line.contains(source.toString())) {
210 String page = line.substring(line.indexOf(source.toString()));
211 String pkey = "?p=";
212 if (page.contains(pkey)) {
213 page = page.substring(page.indexOf(pkey) + pkey.length());
214 String number = "";
215 while (!page.isEmpty() && page.charAt(0) >= '0'
216 && page.charAt(0) <= '9') {
217 number += page.charAt(0);
218 page = page.substring(1);
219 }
220 if (number.isEmpty()) {
221 number = "0";
222 }
223
224 int current = Integer.parseInt(number);
225 if (last < current) {
226 last = current;
227 }
228 }
229 }
230 }
231
232 for (int i = 0; i <= last; i++) {
233 urls.add(new AbstractMap.SimpleEntry<String, URL>(Integer
234 .toString(i + 1), new URL(source.toString() + "?p=" + i)));
235 }
236
237 return urls;
238 }
239
240 @Override
241 protected String getChapterContent(URL source, InputStream in, int number,
242 Progress pg) throws IOException {
243 String staticSite = "https://e-hentai.org/s/";
244 List<URL> pages = new ArrayList<URL>();
245
246 @SuppressWarnings("resource")
247 Scanner scan = new Scanner(in, "UTF-8");
248 scan.useDelimiter("\"");
249 while (scan.hasNext()) {
250 String line = scan.next();
251 if (line.startsWith(staticSite)) {
252 try {
253 pages.add(new URL(line));
254 } catch (MalformedURLException e) {
255 Instance.getInstance().getTraceHandler()
256 .error(new IOException("Parsing error, a link is not correctly parsed: " + line, e));
257 }
258 }
259 }
260
261 if (pg == null) {
262 pg = new Progress();
263 }
264 pg.setMinMax(0, pages.size());
265 pg.setProgress(0);
266
267 StringBuilder builder = new StringBuilder();
268
269 for (URL page : pages) {
270 InputStream pageIn = Instance.getInstance().getCache().open(page, this, false);
271 try {
272 String link = getKeyLine(pageIn, "id=\"img\"", "src=\"", "\"");
273 if (link != null && !link.isEmpty()) {
274 builder.append("[");
275 builder.append(link);
276 builder.append("]<br/>");
277 }
278 pg.add(1);
279 } finally {
280 if (pageIn != null) {
281 pageIn.close();
282 }
283 }
284 }
285
286 pg.done();
287 return builder.toString();
288 }
289 }