Small fixes for epub:
[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / EHentai.java
CommitLineData
f0608ab1
NR
1package be.nikiroo.fanfix.supported;
2
3import java.awt.image.BufferedImage;
4import java.io.IOException;
5import java.io.InputStream;
6import java.net.MalformedURLException;
7import java.net.URL;
8import java.util.ArrayList;
9import java.util.List;
10import java.util.Map.Entry;
11import java.util.Scanner;
12
13import be.nikiroo.fanfix.Instance;
14import be.nikiroo.fanfix.data.Chapter;
15import be.nikiroo.fanfix.data.MetaData;
16import be.nikiroo.fanfix.data.Story;
17import be.nikiroo.utils.Progress;
18import be.nikiroo.utils.StringUtils;
19
20/**
21 * Support class for <a href="https://e-hentai.org/">e-hentai.org</a>, a website
22 * supporting mostly but not always NSFW comics, including some of MLP.
23 *
24 * @author niki
25 */
26class EHentai extends BasicSupport {
27 @Override
28 public String getSourceName() {
29 return "e-hentai.org";
30 }
31
32 @Override
33 protected MetaData getMeta(URL source, InputStream in) throws IOException {
34 MetaData meta = new MetaData();
35
36 meta.setTitle(getTitle(reset(in)));
37 meta.setAuthor(getAuthor(reset(in)));
416ed13f 38 meta.setDate(getDate(reset(in)));
f0608ab1
NR
39 meta.setTags(getTags(reset(in)));
40 meta.setSource(getSourceName());
41 meta.setUrl(source.toString());
42 meta.setPublisher(getSourceName());
43 meta.setUuid(source.toString());
44 meta.setLuid("");
416ed13f
NR
45 meta.setLang(getLang(reset(in)));
46 meta.setSubject("Hentai");
f0608ab1
NR
47 meta.setType(getType().toString());
48 meta.setImageDocument(true);
49 meta.setCover(getCover(source, reset(in)));
50 meta.setFakeCover(true);
51
52 return meta;
53 }
54
55 @Override
56 public Story process(URL url, Progress pg) throws IOException {
57 // There is no chapters on e621, just pagination...
58 Story story = super.process(url, pg);
59
60 Chapter only = new Chapter(1, null);
61 for (Chapter chap : story) {
62 only.getParagraphs().addAll(chap.getParagraphs());
63 }
64
65 story.getChapters().clear();
66 story.getChapters().add(only);
67
68 return story;
69 }
70
71 @Override
72 protected boolean supports(URL url) {
73 return "e-hentai.org".equals(url.getHost());
74 }
75
76 @Override
77 protected boolean isHtml() {
78 return true;
79 }
80
211f7ddb 81 private BufferedImage getCover(URL source, InputStream in) {
f0608ab1
NR
82 BufferedImage author = null;
83 String coverLine = getKeyLine(in, "<div id=\"gd1\"", " url(", ")");
84 if (coverLine != null) {
85 coverLine = StringUtils.unhtml(coverLine).trim();
86 author = getImage(this, source, coverLine);
87 }
88
89 return author;
90 }
91
92 private String getAuthor(InputStream in) {
93 String author = null;
94
95 List<String> tagsAuthor = getTagsAuthor(in);
96 if (!tagsAuthor.isEmpty()) {
97 author = tagsAuthor.get(0);
98 }
99
100 return author;
101 }
102
416ed13f
NR
103 private String getLang(InputStream in) {
104 String lang = null;
105
106 String langLine = getKeyLine(in, "class=\"gdt1\">Language",
107 "class=\"gdt2\"", "</td>");
108 if (langLine != null) {
109 langLine = StringUtils.unhtml(langLine).trim();
110 if (langLine.equalsIgnoreCase("English")) {
111 lang = "EN";
112 } else if (langLine.equalsIgnoreCase("Japanese")) {
113 lang = "JP";
114 } else if (langLine.equalsIgnoreCase("French")) {
115 lang = "FR";
116 } else {
117 // TODO find the code?
118 lang = langLine;
119 }
120 }
121
122 return lang;
123 }
124
125 private String getDate(InputStream in) {
126 String date = null;
127
128 String dateLine = getKeyLine(in, "class=\"gdt1\">Posted",
129 "class=\"gdt2\"", "</td>");
130 if (dateLine != null) {
131 dateLine = StringUtils.unhtml(dateLine).trim();
132 if (dateLine.length() > 10) {
133 dateLine = dateLine.substring(0, 10).trim();
134 }
135
136 date = dateLine;
137 }
138
139 return date;
140 }
141
f0608ab1
NR
142 private List<String> getTags(InputStream in) {
143 List<String> tags = new ArrayList<String>();
144 List<String> tagsAuthor = getTagsAuthor(in);
145
146 for (int i = 1; i < tagsAuthor.size(); i++) {
147 tags.add(tagsAuthor.get(i));
148 }
149
150 return tags;
151 }
152
153 private List<String> getTagsAuthor(InputStream in) {
154 List<String> tags = new ArrayList<String>();
155 String tagLine = getKeyLine(in, "<meta name=\"description\"", "Tags: ",
156 null);
157 if (tagLine != null) {
158 for (String tag : tagLine.split(",")) {
159 String candi = tag.trim();
160 if (!candi.isEmpty() && !tags.contains(candi)) {
161 tags.add(candi);
162 }
163 }
164 }
165
166 return tags;
167 }
168
211f7ddb 169 private String getTitle(InputStream in) {
f0608ab1
NR
170 String siteName = " - E-Hentai Galleries";
171
172 String title = getLine(in, "<title>", 0);
173 if (title != null) {
174 title = StringUtils.unhtml(title).trim();
175 if (title.endsWith(siteName)) {
176 title = title.substring(0, title.length() - siteName.length())
177 .trim();
178 }
179 }
180
181 return title;
182 }
183
184 @Override
185 protected String getDesc(URL source, InputStream in) throws IOException {
186 String desc = null;
187
188 String descLine = getKeyLine(in, "Uploader Comment", null,
189 "<div class=\"c7\"");
190 if (descLine != null) {
191 desc = StringUtils.unhtml(descLine);
192 }
193
194 return desc;
195 }
196
197 @Override
198 protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
199 Progress pg) throws IOException {
200 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
201 int last = 0; // no pool/show when only one page, first page == page 0
202
203 @SuppressWarnings("resource")
204 Scanner scan = new Scanner(in, "UTF-8");
205 scan.useDelimiter(">");
206 while (scan.hasNext()) {
207 String line = scan.next();
208 if (line.contains(source.toString())) {
209 String page = line.substring(line.indexOf(source.toString()));
210 String pkey = "?p=";
211 if (page.contains(pkey)) {
212 page = page.substring(page.indexOf(pkey) + pkey.length());
213 String number = "";
214 while (!page.isEmpty() && page.charAt(0) >= '0'
215 && page.charAt(0) <= '9') {
216 number += page.charAt(0);
217 page = page.substring(1);
218 }
219 if (number.isEmpty()) {
220 number = "0";
221 }
222
223 int current = Integer.parseInt(number);
224 if (last < current) {
225 last = current;
226 }
227 }
228 }
229 }
230
231 for (int i = 0; i <= last; i++) {
232 final String key = Integer.toString(i + 1);
233 final URL value = new URL(source.toString() + "?p=" + i);
234 urls.add(new Entry<String, URL>() {
211f7ddb 235 @Override
f0608ab1
NR
236 public URL setValue(URL value) {
237 return null;
238 }
239
211f7ddb 240 @Override
f0608ab1
NR
241 public URL getValue() {
242 return value;
243 }
244
211f7ddb 245 @Override
f0608ab1
NR
246 public String getKey() {
247 return key;
248 }
249 });
250 }
251
252 return urls;
253 }
254
255 @Override
256 protected String getChapterContent(URL source, InputStream in, int number,
257 Progress pg) throws IOException {
258 String staticSite = "https://e-hentai.org/s/";
259 List<URL> pages = new ArrayList<URL>();
260
261 @SuppressWarnings("resource")
262 Scanner scan = new Scanner(in, "UTF-8");
263 scan.useDelimiter("\"");
264 while (scan.hasNext()) {
265 String line = scan.next();
266 if (line.startsWith(staticSite)) {
267 try {
268 pages.add(new URL(line));
269 } catch (MalformedURLException e) {
270 Instance.syserr(new IOException(
271 "Parsing error, a link is not correctly parsed: "
272 + line, e));
273 }
274 }
275 }
276
277 if (pg == null) {
278 pg = new Progress();
279 }
280 pg.setMinMax(0, pages.size());
281 pg.setProgress(0);
282
283 StringBuilder builder = new StringBuilder();
284
285 for (URL page : pages) {
286 InputStream pageIn = Instance.getCache().open(page, this, false);
287 try {
288 String link = getKeyLine(pageIn, "id=\"img\"", "src=\"", "\"");
289 if (link != null && !link.isEmpty()) {
290 builder.append("[");
291 builder.append(link);
292 builder.append("]<br/>");
293 }
294 pg.add(1);
295 } finally {
296 if (pageIn != null) {
297 pageIn.close();
298 }
299 }
300 }
301
302 pg.done();
303 return builder.toString();
304 }
305}