e621: better title
[fanfix.git] / supported / EHentai.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.MalformedURLException;
6 import java.net.URL;
7 import java.util.AbstractMap;
8 import java.util.ArrayList;
9 import java.util.List;
10 import java.util.Map;
11 import java.util.Map.Entry;
12 import java.util.Scanner;
13
14 import be.nikiroo.fanfix.Instance;
15 import be.nikiroo.fanfix.data.Chapter;
16 import be.nikiroo.fanfix.data.MetaData;
17 import be.nikiroo.fanfix.data.Story;
18 import be.nikiroo.utils.Image;
19 import be.nikiroo.utils.Progress;
20 import be.nikiroo.utils.StringUtils;
21
22 /**
23 * Support class for <a href="https://e-hentai.org/">e-hentai.org</a>, a website
24 * supporting mostly but not always NSFW comics, including some of MLP.
25 *
26 * @author niki
27 */
28 class EHentai extends BasicSupport_Deprecated {
29 @Override
30 protected MetaData getMeta(URL source, InputStream in) throws IOException {
31 MetaData meta = new MetaData();
32
33 meta.setTitle(getTitle(reset(in)));
34 meta.setAuthor(getAuthor(reset(in)));
35 meta.setDate(getDate(reset(in)));
36 meta.setTags(getTags(reset(in)));
37 meta.setSource(getType().getSourceName());
38 meta.setUrl(source.toString());
39 meta.setPublisher(getType().getSourceName());
40 meta.setUuid(source.toString());
41 meta.setLuid("");
42 meta.setLang(getLang(reset(in)));
43 meta.setSubject("Hentai");
44 meta.setType(getType().toString());
45 meta.setImageDocument(true);
46 meta.setCover(getCover(source, reset(in)));
47 meta.setFakeCover(true);
48
49 return meta;
50 }
51
52 @Override
53 public Story process(URL url, Progress pg) throws IOException {
54 // There is no chapters on e621, just pagination...
55 Story story = super.process(url, pg);
56
57 Chapter only = new Chapter(1, null);
58 for (Chapter chap : story) {
59 only.getParagraphs().addAll(chap.getParagraphs());
60 }
61
62 story.getChapters().clear();
63 story.getChapters().add(only);
64
65 return story;
66 }
67
68 @Override
69 protected boolean supports(URL url) {
70 return "e-hentai.org".equals(url.getHost());
71 }
72
73 @Override
74 protected boolean isHtml() {
75 return true;
76 }
77
78 @Override
79 public Map<String, String> getCookies() {
80 Map<String, String> cookies = super.getCookies();
81 cookies.put("nw", "1");
82 return cookies;
83 }
84
85 private Image getCover(URL source, InputStream in) {
86 Image author = null;
87 String coverLine = getKeyLine(in, "<div id=\"gd1\"", " url(", ")");
88 if (coverLine != null) {
89 coverLine = StringUtils.unhtml(coverLine).trim();
90 author = getImage(this, source, coverLine);
91 }
92
93 return author;
94 }
95
96 private String getAuthor(InputStream in) {
97 String author = null;
98
99 List<String> tagsAuthor = getTagsAuthor(in);
100 if (!tagsAuthor.isEmpty()) {
101 author = tagsAuthor.get(0);
102 }
103
104 return author;
105 }
106
107 private String getLang(InputStream in) {
108 String lang = null;
109
110 String langLine = getKeyLine(in, "class=\"gdt1\">Language",
111 "class=\"gdt2\"", "</td>");
112 if (langLine != null) {
113 langLine = StringUtils.unhtml(langLine).trim();
114 if (langLine.equalsIgnoreCase("English")) {
115 lang = "en";
116 } else if (langLine.equalsIgnoreCase("Japanese")) {
117 lang = "jp";
118 } else if (langLine.equalsIgnoreCase("French")) {
119 lang = "fr";
120 } else {
121 // TODO find the code?
122 lang = langLine;
123 }
124 }
125
126 return lang;
127 }
128
129 private String getDate(InputStream in) {
130 String date = null;
131
132 String dateLine = getKeyLine(in, "class=\"gdt1\">Posted",
133 "class=\"gdt2\"", "</td>");
134 if (dateLine != null) {
135 dateLine = StringUtils.unhtml(dateLine).trim();
136 if (dateLine.length() > 10) {
137 dateLine = dateLine.substring(0, 10).trim();
138 }
139
140 date = dateLine;
141 }
142
143 return date;
144 }
145
146 private List<String> getTags(InputStream in) {
147 List<String> tags = new ArrayList<String>();
148 List<String> tagsAuthor = getTagsAuthor(in);
149
150 for (int i = 1; i < tagsAuthor.size(); i++) {
151 tags.add(tagsAuthor.get(i));
152 }
153
154 return tags;
155 }
156
157 private List<String> getTagsAuthor(InputStream in) {
158 List<String> tags = new ArrayList<String>();
159 String tagLine = getKeyLine(in, "<meta name=\"description\"", "Tags: ",
160 null);
161 if (tagLine != null) {
162 for (String tag : tagLine.split(",")) {
163 String candi = tag.trim();
164 if (!candi.isEmpty() && !tags.contains(candi)) {
165 tags.add(candi);
166 }
167 }
168 }
169
170 return tags;
171 }
172
173 private String getTitle(InputStream in) {
174 String siteName = " - E-Hentai Galleries";
175
176 String title = getLine(in, "<title>", 0);
177 if (title != null) {
178 title = StringUtils.unhtml(title).trim();
179 if (title.endsWith(siteName)) {
180 title = title.substring(0, title.length() - siteName.length())
181 .trim();
182 }
183 }
184
185 return title;
186 }
187
188 @Override
189 protected String getDesc(URL source, InputStream in) throws IOException {
190 String desc = null;
191
192 String descLine = getKeyLine(in, "Uploader Comment", null,
193 "<div class=\"c7\"");
194 if (descLine != null) {
195 desc = StringUtils.unhtml(descLine);
196 }
197
198 return desc;
199 }
200
201 @Override
202 protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
203 Progress pg) throws IOException {
204 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
205 int last = 0; // no pool/show when only one page, first page == page 0
206
207 @SuppressWarnings("resource")
208 Scanner scan = new Scanner(in, "UTF-8");
209 scan.useDelimiter(">");
210 while (scan.hasNext()) {
211 String line = scan.next();
212 if (line.contains(source.toString())) {
213 String page = line.substring(line.indexOf(source.toString()));
214 String pkey = "?p=";
215 if (page.contains(pkey)) {
216 page = page.substring(page.indexOf(pkey) + pkey.length());
217 String number = "";
218 while (!page.isEmpty() && page.charAt(0) >= '0'
219 && page.charAt(0) <= '9') {
220 number += page.charAt(0);
221 page = page.substring(1);
222 }
223 if (number.isEmpty()) {
224 number = "0";
225 }
226
227 int current = Integer.parseInt(number);
228 if (last < current) {
229 last = current;
230 }
231 }
232 }
233 }
234
235 for (int i = 0; i <= last; i++) {
236 urls.add(new AbstractMap.SimpleEntry<String, URL>(Integer
237 .toString(i + 1), new URL(source.toString() + "?p=" + i)));
238 }
239
240 return urls;
241 }
242
243 @Override
244 protected String getChapterContent(URL source, InputStream in, int number,
245 Progress pg) throws IOException {
246 String staticSite = "https://e-hentai.org/s/";
247 List<URL> pages = new ArrayList<URL>();
248
249 @SuppressWarnings("resource")
250 Scanner scan = new Scanner(in, "UTF-8");
251 scan.useDelimiter("\"");
252 while (scan.hasNext()) {
253 String line = scan.next();
254 if (line.startsWith(staticSite)) {
255 try {
256 pages.add(new URL(line));
257 } catch (MalformedURLException e) {
258 Instance.getInstance().getTraceHandler()
259 .error(new IOException("Parsing error, a link is not correctly parsed: " + line, e));
260 }
261 }
262 }
263
264 if (pg == null) {
265 pg = new Progress();
266 }
267 pg.setMinMax(0, pages.size());
268 pg.setProgress(0);
269
270 StringBuilder builder = new StringBuilder();
271
272 for (URL page : pages) {
273 InputStream pageIn = Instance.getInstance().getCache().open(page, this, false);
274 try {
275 String link = getKeyLine(pageIn, "id=\"img\"", "src=\"", "\"");
276 if (link != null && !link.isEmpty()) {
277 builder.append("[");
278 builder.append(link);
279 builder.append("]<br/>");
280 }
281 pg.add(1);
282 } finally {
283 if (pageIn != null) {
284 pageIn.close();
285 }
286 }
287 }
288
289 pg.done();
290 return builder.toString();
291 }
292 }