1 package be
.nikiroo
.fanfix
.supported
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
5 import java
.io
.UnsupportedEncodingException
;
6 import java
.net
.MalformedURLException
;
8 import java
.net
.URLDecoder
;
9 import java
.net
.URLEncoder
;
10 import java
.util
.AbstractMap
;
11 import java
.util
.ArrayList
;
12 import java
.util
.Collection
;
13 import java
.util
.Collections
;
14 import java
.util
.LinkedList
;
15 import java
.util
.List
;
16 import java
.util
.AbstractMap
.SimpleEntry
;
17 import java
.util
.Map
.Entry
;
18 import java
.util
.Scanner
;
20 import be
.nikiroo
.fanfix
.Instance
;
21 import be
.nikiroo
.fanfix
.data
.Chapter
;
22 import be
.nikiroo
.fanfix
.data
.MetaData
;
23 import be
.nikiroo
.fanfix
.data
.Story
;
24 import be
.nikiroo
.utils
.Image
;
25 import be
.nikiroo
.utils
.Progress
;
26 import be
.nikiroo
.utils
.StringUtils
;
29 * Support class for <a href="http://e621.net/">e621.net</a> and <a
30 * href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
31 * including some of MLP.
33 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
34 * comics, but it can be difficult to browse.
38 class E621
extends BasicSupport_Deprecated
{
40 public String
getSourceName() {
45 protected MetaData
getMeta(URL source
, InputStream in
) throws IOException
{
46 MetaData meta
= new MetaData();
48 meta
.setTitle(getTitle(reset(in
)));
49 meta
.setAuthor(getAuthor(source
, reset(in
)));
51 meta
.setTags(getTags(source
, reset(in
), false));
52 meta
.setSource(getSourceName());
53 meta
.setUrl(source
.toString());
54 meta
.setPublisher(getSourceName());
55 meta
.setUuid(source
.toString());
58 meta
.setSubject("Furry");
59 meta
.setType(getType().toString());
60 meta
.setImageDocument(true);
61 meta
.setCover(getCover(source
, reset(in
)));
62 meta
.setFakeCover(true);
67 private List
<String
> getTags(URL source
, InputStream in
, boolean authors
) {
68 List
<String
> tags
= new ArrayList
<String
>();
70 if (isSearch(source
)) {
71 String tagLine
= getLine(in
, "id=\"tag-sidebar\"", 1);
72 if (tagLine
!= null) {
73 String key
= "href=\"";
74 for (int pos
= tagLine
.indexOf(key
); pos
>= 0; pos
= tagLine
75 .indexOf(key
, pos
+ 1)) {
76 int end
= tagLine
.indexOf("\"", pos
+ key
.length());
78 String href
= tagLine
.substring(pos
, end
);
84 if (href
.contains(subkey
)) {
85 String tag
= href
.substring(href
.indexOf(subkey
)
88 tags
.add(URLDecoder
.decode(tag
, "UTF-8"));
89 } catch (UnsupportedEncodingException e
) {
90 // supported JVMs must have UTF-8 support
104 public Story
process(URL url
, Progress pg
) throws IOException
{
105 // There is no chapters on e621, just pagination...
106 Story story
= super.process(url
, pg
);
108 Chapter only
= new Chapter(1, null);
109 for (Chapter chap
: story
) {
110 only
.getParagraphs().addAll(chap
.getParagraphs());
113 story
.getChapters().clear();
114 story
.getChapters().add(only
);
120 protected boolean supports(URL url
) {
121 String host
= url
.getHost();
122 if (host
.startsWith("www.")) {
123 host
= host
.substring("www.".length());
126 return ("e621.net".equals(host
) || "e926.net".equals(host
))
127 && (isPool(url
) || isSearch(url
));
131 protected boolean isHtml() {
135 private Image
getCover(URL source
, InputStream in
) throws IOException
{
136 URL urlForCover
= source
;
137 if (isPool(source
)) {
138 urlForCover
= new URL(source
.toString() + "?page=1");
141 String images
= getChapterContent(urlForCover
, in
, 1, null);
142 if (!images
.isEmpty()) {
143 int pos
= images
.indexOf("<br/>");
145 images
= images
.substring(1, pos
- 1);
146 return getImage(this, null, images
);
153 private String
getAuthor(URL source
, InputStream in
) {
154 if (isSearch(source
)) {
155 StringBuilder builder
= new StringBuilder();
156 for (String author
: getTags(source
, in
, true)) {
157 if (builder
.length() > 0)
158 builder
.append(", ");
159 builder
.append(author
);
162 return builder
.toString();
165 String author
= getLine(in
, "href=\"/post/show/", 0);
166 if (author
!= null) {
167 String key
= "href=\"";
168 int pos
= author
.indexOf(key
);
170 author
= author
.substring(pos
+ key
.length());
171 pos
= author
.indexOf("\"");
173 author
= author
.substring(0, pos
- 1);
174 String page
= source
.getProtocol() + "://"
175 + source
.getHost() + author
;
177 InputStream pageIn
= Instance
.getCache().open(
178 new URL(page
), this, false);
180 key
= "class=\"tag-type-artist\"";
181 author
= getLine(pageIn
, key
, 0);
182 if (author
!= null) {
183 pos
= author
.indexOf("<a href=\"");
185 author
= author
.substring(pos
);
186 pos
= author
.indexOf("</a>");
188 author
= author
.substring(0, pos
);
189 return StringUtils
.unhtml(author
);
196 } catch (Exception e
) {
206 private String
getTitle(InputStream in
) {
207 String title
= getLine(in
, "<title>", 0);
209 int pos
= title
.indexOf('>');
211 title
= title
.substring(pos
+ 1);
212 pos
= title
.indexOf('<');
214 title
= title
.substring(0, pos
);
218 if (title
.startsWith("Pool:")) {
219 title
= title
.substring("Pool:".length());
222 title
= StringUtils
.unhtml(title
).trim();
229 protected String
getDesc(URL source
, InputStream in
) throws IOException
{
230 String desc
= getLine(in
, "margin-bottom: 2em;", 0);
233 StringBuilder builder
= new StringBuilder();
235 boolean inTags
= false;
236 for (char car
: desc
.toCharArray()) {
237 if ((inTags
&& car
== '>') || (!inTags
&& car
== '<')) {
246 return builder
.toString().trim();
253 protected List
<Entry
<String
, URL
>> getChapters(URL source
, InputStream in
,
254 Progress pg
) throws IOException
{
255 if (isPool(source
)) {
256 return getChaptersPool(source
, in
, pg
);
257 } else if (isSearch(source
)) {
258 return getChaptersSearch(source
, in
, pg
);
261 return new LinkedList
<Entry
<String
, URL
>>();
264 private List
<Entry
<String
, URL
>> getChaptersSearch(URL source
,
265 InputStream in
, Progress pg
) throws IOException
{
266 List
<Entry
<String
, URL
>> urls
= new ArrayList
<Entry
<String
, URL
>>();
268 String search
= source
.getPath();
269 if (search
.endsWith("/")) {
270 search
= search
.substring(0, search
.length() - 1);
273 int pos
= search
.lastIndexOf('/');
275 search
= search
.substring(pos
+ 1);
278 String baseUrl
= "https://e621.net/post/index/";
279 if (source
.getHost().contains("e926")) {
280 baseUrl
= baseUrl
.replace("e621", "e926");
283 for (int i
= 1; true; i
++) {
284 URL url
= new URL(baseUrl
+ i
+ "/" + search
+ "/");
286 InputStream pageI
= Instance
.getCache().open(url
, this, false);
288 if (getLine(pageI
, "No posts matched your search.", 0) != null)
290 urls
.add(new AbstractMap
.SimpleEntry
<String
, URL
>("Page "
291 + Integer
.toString(i
), url
));
295 } catch (Exception e
) {
300 // They are sorted in reverse order on the website
301 Collections
.reverse(urls
);
305 private List
<Entry
<String
, URL
>> getChaptersPool(URL source
,
306 InputStream in
, Progress pg
) throws IOException
{
307 List
<Entry
<String
, URL
>> urls
= new ArrayList
<Entry
<String
, URL
>>();
308 int last
= 1; // no pool/show when only one page
310 @SuppressWarnings("resource")
311 Scanner scan
= new Scanner(in
, "UTF-8");
312 scan
.useDelimiter("\\n");
313 while (scan
.hasNext()) {
314 String line
= scan
.next();
315 for (int pos
= line
.indexOf(source
.getPath()); pos
>= 0; pos
= line
316 .indexOf(source
.getPath(), pos
+ source
.getPath().length())) {
317 int equalPos
= line
.indexOf("=", pos
);
318 int quotePos
= line
.indexOf("\"", pos
);
319 if (equalPos
>= 0 && quotePos
> equalPos
) {
320 String snum
= line
.substring(equalPos
+ 1, quotePos
);
322 int num
= Integer
.parseInt(snum
);
326 } catch (NumberFormatException e
) {
332 for (int i
= 1; i
<= last
; i
++) {
333 urls
.add(new AbstractMap
.SimpleEntry
<String
, URL
>(Integer
334 .toString(i
), new URL(source
.toString() + "?page=" + i
)));
341 protected String
getChapterContent(URL source
, InputStream in
, int number
,
342 Progress pg
) throws IOException
{
343 StringBuilder builder
= new StringBuilder();
344 String staticSite
= "https://static1.e621.net";
345 if (source
.getHost().contains("e926")) {
346 staticSite
= staticSite
.replace("e621", "e926");
349 String key
= staticSite
+ "/data/preview/";
351 @SuppressWarnings("resource")
352 Scanner scan
= new Scanner(in
, "UTF-8");
353 scan
.useDelimiter("\\n");
354 while (scan
.hasNext()) {
355 String line
= scan
.next();
356 if (line
.contains("class=\"preview")) {
357 for (int pos
= line
.indexOf(key
); pos
>= 0; pos
= line
.indexOf(
358 key
, pos
+ key
.length())) {
359 int endPos
= line
.indexOf("\"", pos
);
361 String id
= line
.substring(pos
+ key
.length(), endPos
);
362 id
= staticSite
+ "/data/" + id
;
364 int dotPos
= id
.lastIndexOf(".");
366 id
= id
.substring(0, dotPos
);
369 builder
.append("]<br/>");
376 return builder
.toString();
380 protected URL
getCanonicalUrl(URL source
) {
381 if (isSearch(source
)) {
382 // /post?tags=tag1+tag2 -> ../post/index/1/tag1%32tag2
383 String key
= "post?tags=";
384 if (source
.toString().contains(key
)) {
385 int pos
= source
.toString().indexOf(key
);
386 String tags
= source
.toString().substring(pos
+ key
.length());
387 tags
= tags
.replace("+", "%32");
389 return new URL(source
.toString().substring(0, pos
)
390 + "post/index/1/" + tags
);
391 } catch (MalformedURLException e
) {
392 Instance
.getTraceHandler().error(e
);
396 return super.getCanonicalUrl(source
);
399 private boolean isPool(URL url
) {
400 return url
.getPath().startsWith("/pool/");
403 private boolean isSearch(URL url
) {
404 return url
.getPath().startsWith("/post/index/")
405 || (url
.getPath().equals("/post") && url
.getQuery().startsWith(