fix see word count on source/author, step 2
[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / EHentai.java
CommitLineData
f0608ab1
NR
1package be.nikiroo.fanfix.supported;
2
f0608ab1
NR
3import java.io.IOException;
4import java.io.InputStream;
5import java.net.MalformedURLException;
6import java.net.URL;
ce297a79 7import java.util.AbstractMap;
f0608ab1
NR
8import java.util.ArrayList;
9import java.util.List;
abe90582 10import java.util.Map;
f0608ab1
NR
11import java.util.Map.Entry;
12import java.util.Scanner;
13
14import be.nikiroo.fanfix.Instance;
15import be.nikiroo.fanfix.data.Chapter;
16import be.nikiroo.fanfix.data.MetaData;
17import be.nikiroo.fanfix.data.Story;
16a81ef7 18import be.nikiroo.utils.Image;
f0608ab1
NR
19import be.nikiroo.utils.Progress;
20import be.nikiroo.utils.StringUtils;
21
22/**
23 * Support class for <a href="https://e-hentai.org/">e-hentai.org</a>, a website
24 * supporting mostly but not always NSFW comics, including some of MLP.
25 *
26 * @author niki
27 */
0ffa4754 28class EHentai extends BasicSupport_Deprecated {
f0608ab1
NR
29 @Override
30 public String getSourceName() {
31 return "e-hentai.org";
32 }
33
34 @Override
35 protected MetaData getMeta(URL source, InputStream in) throws IOException {
36 MetaData meta = new MetaData();
37
38 meta.setTitle(getTitle(reset(in)));
39 meta.setAuthor(getAuthor(reset(in)));
416ed13f 40 meta.setDate(getDate(reset(in)));
f0608ab1
NR
41 meta.setTags(getTags(reset(in)));
42 meta.setSource(getSourceName());
43 meta.setUrl(source.toString());
44 meta.setPublisher(getSourceName());
45 meta.setUuid(source.toString());
46 meta.setLuid("");
416ed13f
NR
47 meta.setLang(getLang(reset(in)));
48 meta.setSubject("Hentai");
f0608ab1
NR
49 meta.setType(getType().toString());
50 meta.setImageDocument(true);
51 meta.setCover(getCover(source, reset(in)));
52 meta.setFakeCover(true);
53
54 return meta;
55 }
56
57 @Override
58 public Story process(URL url, Progress pg) throws IOException {
59 // There is no chapters on e621, just pagination...
60 Story story = super.process(url, pg);
61
62 Chapter only = new Chapter(1, null);
63 for (Chapter chap : story) {
64 only.getParagraphs().addAll(chap.getParagraphs());
65 }
66
67 story.getChapters().clear();
68 story.getChapters().add(only);
69
70 return story;
71 }
72
73 @Override
74 protected boolean supports(URL url) {
75 return "e-hentai.org".equals(url.getHost());
76 }
77
78 @Override
79 protected boolean isHtml() {
80 return true;
81 }
82
abe90582
NR
83 @Override
84 public Map<String, String> getCookies() {
abe90582
NR
85 Map<String, String> cookies = super.getCookies();
86 cookies.put("nw", "1");
87 return cookies;
88 }
89
16a81ef7
NR
90 private Image getCover(URL source, InputStream in) {
91 Image author = null;
f0608ab1
NR
92 String coverLine = getKeyLine(in, "<div id=\"gd1\"", " url(", ")");
93 if (coverLine != null) {
94 coverLine = StringUtils.unhtml(coverLine).trim();
95 author = getImage(this, source, coverLine);
96 }
97
98 return author;
99 }
100
101 private String getAuthor(InputStream in) {
102 String author = null;
103
104 List<String> tagsAuthor = getTagsAuthor(in);
105 if (!tagsAuthor.isEmpty()) {
106 author = tagsAuthor.get(0);
107 }
108
109 return author;
110 }
111
416ed13f
NR
112 private String getLang(InputStream in) {
113 String lang = null;
114
115 String langLine = getKeyLine(in, "class=\"gdt1\">Language",
116 "class=\"gdt2\"", "</td>");
117 if (langLine != null) {
118 langLine = StringUtils.unhtml(langLine).trim();
119 if (langLine.equalsIgnoreCase("English")) {
276f95c6 120 lang = "en";
416ed13f 121 } else if (langLine.equalsIgnoreCase("Japanese")) {
276f95c6 122 lang = "jp";
416ed13f 123 } else if (langLine.equalsIgnoreCase("French")) {
276f95c6 124 lang = "fr";
416ed13f
NR
125 } else {
126 // TODO find the code?
127 lang = langLine;
128 }
129 }
130
131 return lang;
132 }
133
134 private String getDate(InputStream in) {
135 String date = null;
136
137 String dateLine = getKeyLine(in, "class=\"gdt1\">Posted",
138 "class=\"gdt2\"", "</td>");
139 if (dateLine != null) {
140 dateLine = StringUtils.unhtml(dateLine).trim();
141 if (dateLine.length() > 10) {
142 dateLine = dateLine.substring(0, 10).trim();
143 }
144
145 date = dateLine;
146 }
147
148 return date;
149 }
150
f0608ab1
NR
151 private List<String> getTags(InputStream in) {
152 List<String> tags = new ArrayList<String>();
153 List<String> tagsAuthor = getTagsAuthor(in);
154
155 for (int i = 1; i < tagsAuthor.size(); i++) {
156 tags.add(tagsAuthor.get(i));
157 }
158
159 return tags;
160 }
161
162 private List<String> getTagsAuthor(InputStream in) {
163 List<String> tags = new ArrayList<String>();
164 String tagLine = getKeyLine(in, "<meta name=\"description\"", "Tags: ",
165 null);
166 if (tagLine != null) {
167 for (String tag : tagLine.split(",")) {
168 String candi = tag.trim();
169 if (!candi.isEmpty() && !tags.contains(candi)) {
170 tags.add(candi);
171 }
172 }
173 }
174
175 return tags;
176 }
177
211f7ddb 178 private String getTitle(InputStream in) {
f0608ab1
NR
179 String siteName = " - E-Hentai Galleries";
180
181 String title = getLine(in, "<title>", 0);
182 if (title != null) {
183 title = StringUtils.unhtml(title).trim();
184 if (title.endsWith(siteName)) {
185 title = title.substring(0, title.length() - siteName.length())
186 .trim();
187 }
188 }
189
190 return title;
191 }
192
193 @Override
194 protected String getDesc(URL source, InputStream in) throws IOException {
195 String desc = null;
196
197 String descLine = getKeyLine(in, "Uploader Comment", null,
198 "<div class=\"c7\"");
199 if (descLine != null) {
200 desc = StringUtils.unhtml(descLine);
201 }
202
203 return desc;
204 }
205
206 @Override
207 protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
208 Progress pg) throws IOException {
209 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
210 int last = 0; // no pool/show when only one page, first page == page 0
211
212 @SuppressWarnings("resource")
213 Scanner scan = new Scanner(in, "UTF-8");
214 scan.useDelimiter(">");
215 while (scan.hasNext()) {
216 String line = scan.next();
217 if (line.contains(source.toString())) {
218 String page = line.substring(line.indexOf(source.toString()));
219 String pkey = "?p=";
220 if (page.contains(pkey)) {
221 page = page.substring(page.indexOf(pkey) + pkey.length());
222 String number = "";
223 while (!page.isEmpty() && page.charAt(0) >= '0'
224 && page.charAt(0) <= '9') {
225 number += page.charAt(0);
226 page = page.substring(1);
227 }
228 if (number.isEmpty()) {
229 number = "0";
230 }
231
232 int current = Integer.parseInt(number);
233 if (last < current) {
234 last = current;
235 }
236 }
237 }
238 }
239
240 for (int i = 0; i <= last; i++) {
ce297a79
NR
241 urls.add(new AbstractMap.SimpleEntry<String, URL>(Integer
242 .toString(i + 1), new URL(source.toString() + "?p=" + i)));
f0608ab1
NR
243 }
244
245 return urls;
246 }
247
248 @Override
249 protected String getChapterContent(URL source, InputStream in, int number,
250 Progress pg) throws IOException {
251 String staticSite = "https://e-hentai.org/s/";
252 List<URL> pages = new ArrayList<URL>();
253
254 @SuppressWarnings("resource")
255 Scanner scan = new Scanner(in, "UTF-8");
256 scan.useDelimiter("\"");
257 while (scan.hasNext()) {
258 String line = scan.next();
259 if (line.startsWith(staticSite)) {
260 try {
261 pages.add(new URL(line));
262 } catch (MalformedURLException e) {
16a81ef7
NR
263 Instance.getTraceHandler().error(
264 new IOException(
265 "Parsing error, a link is not correctly parsed: "
266 + line, e));
f0608ab1
NR
267 }
268 }
269 }
270
271 if (pg == null) {
272 pg = new Progress();
273 }
274 pg.setMinMax(0, pages.size());
275 pg.setProgress(0);
276
277 StringBuilder builder = new StringBuilder();
278
279 for (URL page : pages) {
280 InputStream pageIn = Instance.getCache().open(page, this, false);
281 try {
282 String link = getKeyLine(pageIn, "id=\"img\"", "src=\"", "\"");
283 if (link != null && !link.isEmpty()) {
284 builder.append("[");
285 builder.append(link);
286 builder.append("]<br/>");
287 }
288 pg.add(1);
289 } finally {
290 if (pageIn != null) {
291 pageIn.close();
292 }
293 }
294 }
295
296 pg.done();
297 return builder.toString();
298 }
299}