fix changelog headers
[fanfix.git] / EHentai.java
... / ...
CommitLineData
1package be.nikiroo.fanfix.supported;
2
3import java.io.IOException;
4import java.io.InputStream;
5import java.net.MalformedURLException;
6import java.net.URL;
7import java.util.AbstractMap;
8import java.util.ArrayList;
9import java.util.List;
10import java.util.Map.Entry;
11import java.util.Scanner;
12
13import be.nikiroo.fanfix.Instance;
14import be.nikiroo.fanfix.data.Chapter;
15import be.nikiroo.fanfix.data.MetaData;
16import be.nikiroo.fanfix.data.Story;
17import be.nikiroo.utils.Image;
18import be.nikiroo.utils.Progress;
19import be.nikiroo.utils.StringUtils;
20
21/**
22 * Support class for <a href="https://e-hentai.org/">e-hentai.org</a>, a website
23 * supporting mostly but not always NSFW comics, including some of MLP.
24 *
25 * @author niki
26 */
27class EHentai extends BasicSupport_Deprecated {
28 @Override
29 public String getSourceName() {
30 return "e-hentai.org";
31 }
32
33 @Override
34 protected MetaData getMeta(URL source, InputStream in) throws IOException {
35 MetaData meta = new MetaData();
36
37 meta.setTitle(getTitle(reset(in)));
38 meta.setAuthor(getAuthor(reset(in)));
39 meta.setDate(getDate(reset(in)));
40 meta.setTags(getTags(reset(in)));
41 meta.setSource(getSourceName());
42 meta.setUrl(source.toString());
43 meta.setPublisher(getSourceName());
44 meta.setUuid(source.toString());
45 meta.setLuid("");
46 meta.setLang(getLang(reset(in)));
47 meta.setSubject("Hentai");
48 meta.setType(getType().toString());
49 meta.setImageDocument(true);
50 meta.setCover(getCover(source, reset(in)));
51 meta.setFakeCover(true);
52
53 return meta;
54 }
55
56 @Override
57 public Story process(URL url, Progress pg) throws IOException {
58 // There is no chapters on e621, just pagination...
59 Story story = super.process(url, pg);
60
61 Chapter only = new Chapter(1, null);
62 for (Chapter chap : story) {
63 only.getParagraphs().addAll(chap.getParagraphs());
64 }
65
66 story.getChapters().clear();
67 story.getChapters().add(only);
68
69 return story;
70 }
71
72 @Override
73 protected boolean supports(URL url) {
74 return "e-hentai.org".equals(url.getHost());
75 }
76
77 @Override
78 protected boolean isHtml() {
79 return true;
80 }
81
82 private Image getCover(URL source, InputStream in) {
83 Image author = null;
84 String coverLine = getKeyLine(in, "<div id=\"gd1\"", " url(", ")");
85 if (coverLine != null) {
86 coverLine = StringUtils.unhtml(coverLine).trim();
87 author = getImage(this, source, coverLine);
88 }
89
90 return author;
91 }
92
93 private String getAuthor(InputStream in) {
94 String author = null;
95
96 List<String> tagsAuthor = getTagsAuthor(in);
97 if (!tagsAuthor.isEmpty()) {
98 author = tagsAuthor.get(0);
99 }
100
101 return author;
102 }
103
104 private String getLang(InputStream in) {
105 String lang = null;
106
107 String langLine = getKeyLine(in, "class=\"gdt1\">Language",
108 "class=\"gdt2\"", "</td>");
109 if (langLine != null) {
110 langLine = StringUtils.unhtml(langLine).trim();
111 if (langLine.equalsIgnoreCase("English")) {
112 lang = "en";
113 } else if (langLine.equalsIgnoreCase("Japanese")) {
114 lang = "jp";
115 } else if (langLine.equalsIgnoreCase("French")) {
116 lang = "fr";
117 } else {
118 // TODO find the code?
119 lang = langLine;
120 }
121 }
122
123 return lang;
124 }
125
126 private String getDate(InputStream in) {
127 String date = null;
128
129 String dateLine = getKeyLine(in, "class=\"gdt1\">Posted",
130 "class=\"gdt2\"", "</td>");
131 if (dateLine != null) {
132 dateLine = StringUtils.unhtml(dateLine).trim();
133 if (dateLine.length() > 10) {
134 dateLine = dateLine.substring(0, 10).trim();
135 }
136
137 date = dateLine;
138 }
139
140 return date;
141 }
142
143 private List<String> getTags(InputStream in) {
144 List<String> tags = new ArrayList<String>();
145 List<String> tagsAuthor = getTagsAuthor(in);
146
147 for (int i = 1; i < tagsAuthor.size(); i++) {
148 tags.add(tagsAuthor.get(i));
149 }
150
151 return tags;
152 }
153
154 private List<String> getTagsAuthor(InputStream in) {
155 List<String> tags = new ArrayList<String>();
156 String tagLine = getKeyLine(in, "<meta name=\"description\"", "Tags: ",
157 null);
158 if (tagLine != null) {
159 for (String tag : tagLine.split(",")) {
160 String candi = tag.trim();
161 if (!candi.isEmpty() && !tags.contains(candi)) {
162 tags.add(candi);
163 }
164 }
165 }
166
167 return tags;
168 }
169
170 private String getTitle(InputStream in) {
171 String siteName = " - E-Hentai Galleries";
172
173 String title = getLine(in, "<title>", 0);
174 if (title != null) {
175 title = StringUtils.unhtml(title).trim();
176 if (title.endsWith(siteName)) {
177 title = title.substring(0, title.length() - siteName.length())
178 .trim();
179 }
180 }
181
182 return title;
183 }
184
185 @Override
186 protected String getDesc(URL source, InputStream in) throws IOException {
187 String desc = null;
188
189 String descLine = getKeyLine(in, "Uploader Comment", null,
190 "<div class=\"c7\"");
191 if (descLine != null) {
192 desc = StringUtils.unhtml(descLine);
193 }
194
195 return desc;
196 }
197
198 @Override
199 protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
200 Progress pg) throws IOException {
201 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
202 int last = 0; // no pool/show when only one page, first page == page 0
203
204 @SuppressWarnings("resource")
205 Scanner scan = new Scanner(in, "UTF-8");
206 scan.useDelimiter(">");
207 while (scan.hasNext()) {
208 String line = scan.next();
209 if (line.contains(source.toString())) {
210 String page = line.substring(line.indexOf(source.toString()));
211 String pkey = "?p=";
212 if (page.contains(pkey)) {
213 page = page.substring(page.indexOf(pkey) + pkey.length());
214 String number = "";
215 while (!page.isEmpty() && page.charAt(0) >= '0'
216 && page.charAt(0) <= '9') {
217 number += page.charAt(0);
218 page = page.substring(1);
219 }
220 if (number.isEmpty()) {
221 number = "0";
222 }
223
224 int current = Integer.parseInt(number);
225 if (last < current) {
226 last = current;
227 }
228 }
229 }
230 }
231
232 for (int i = 0; i <= last; i++) {
233 urls.add(new AbstractMap.SimpleEntry<String, URL>(Integer
234 .toString(i + 1), new URL(source.toString() + "?p=" + i)));
235 }
236
237 return urls;
238 }
239
240 @Override
241 protected String getChapterContent(URL source, InputStream in, int number,
242 Progress pg) throws IOException {
243 String staticSite = "https://e-hentai.org/s/";
244 List<URL> pages = new ArrayList<URL>();
245
246 @SuppressWarnings("resource")
247 Scanner scan = new Scanner(in, "UTF-8");
248 scan.useDelimiter("\"");
249 while (scan.hasNext()) {
250 String line = scan.next();
251 if (line.startsWith(staticSite)) {
252 try {
253 pages.add(new URL(line));
254 } catch (MalformedURLException e) {
255 Instance.getTraceHandler().error(
256 new IOException(
257 "Parsing error, a link is not correctly parsed: "
258 + line, e));
259 }
260 }
261 }
262
263 if (pg == null) {
264 pg = new Progress();
265 }
266 pg.setMinMax(0, pages.size());
267 pg.setProgress(0);
268
269 StringBuilder builder = new StringBuilder();
270
271 for (URL page : pages) {
272 InputStream pageIn = Instance.getCache().open(page, this, false);
273 try {
274 String link = getKeyLine(pageIn, "id=\"img\"", "src=\"", "\"");
275 if (link != null && !link.isEmpty()) {
276 builder.append("[");
277 builder.append(link);
278 builder.append("]<br/>");
279 }
280 pg.add(1);
281 } finally {
282 if (pageIn != null) {
283 pageIn.close();
284 }
285 }
286 }
287
288 pg.done();
289 return builder.toString();
290 }
291}