1 package be
.nikiroo
.fanfix
.supported
;
3 import java
.io
.IOException
;
4 import java
.io
.InputStream
;
5 import java
.io
.UnsupportedEncodingException
;
6 import java
.net
.MalformedURLException
;
8 import java
.net
.URLDecoder
;
9 import java
.util
.AbstractMap
;
10 import java
.util
.ArrayList
;
11 import java
.util
.Collections
;
12 import java
.util
.LinkedList
;
13 import java
.util
.List
;
14 import java
.util
.Map
.Entry
;
15 import java
.util
.Scanner
;
17 import be
.nikiroo
.fanfix
.Instance
;
18 import be
.nikiroo
.fanfix
.data
.Chapter
;
19 import be
.nikiroo
.fanfix
.data
.MetaData
;
20 import be
.nikiroo
.fanfix
.data
.Story
;
21 import be
.nikiroo
.utils
.Image
;
22 import be
.nikiroo
.utils
.Progress
;
23 import be
.nikiroo
.utils
.StringUtils
;
26 * Support class for <a href="http://e621.net/">e621.net</a> and <a
27 * href="http://e926.net/">e926.net</a>, a Furry website supporting comics,
28 * including some of MLP.
30 * <a href="http://e926.net/">e926.net</a> only shows the "clean" images and
31 * comics, but it can be difficult to browse.
35 class E621
extends BasicSupport_Deprecated
{
37 protected MetaData
getMeta(URL source
, InputStream in
) throws IOException
{
38 MetaData meta
= new MetaData();
40 meta
.setTitle(getTitle(reset(in
)));
41 meta
.setAuthor(getAuthor(source
, reset(in
)));
43 meta
.setTags(getTags(source
, reset(in
), false));
44 meta
.setSource(getType().getSourceName());
45 meta
.setUrl(source
.toString());
46 meta
.setPublisher(getType().getSourceName());
47 meta
.setUuid(source
.toString());
50 meta
.setSubject("Furry");
51 meta
.setType(getType().toString());
52 meta
.setImageDocument(true);
53 meta
.setCover(getCover(source
, reset(in
)));
54 meta
.setFakeCover(true);
59 private List
<String
> getTags(URL source
, InputStream in
, boolean authors
) {
60 List
<String
> tags
= new ArrayList
<String
>();
62 if (isSearch(source
)) {
63 String tagLine
= getLine(in
, "id=\"tag-sidebar\"", 1);
64 if (tagLine
!= null) {
65 String key
= "href=\"";
66 for (int pos
= tagLine
.indexOf(key
); pos
>= 0; pos
= tagLine
67 .indexOf(key
, pos
+ 1)) {
68 int end
= tagLine
.indexOf("\"", pos
+ key
.length());
70 String href
= tagLine
.substring(pos
, end
);
76 if (href
.contains(subkey
)) {
77 String tag
= href
.substring(href
.indexOf(subkey
)
80 tags
.add(URLDecoder
.decode(tag
, "UTF-8"));
81 } catch (UnsupportedEncodingException e
) {
82 // supported JVMs must have UTF-8 support
96 public Story
process(URL url
, Progress pg
) throws IOException
{
97 // There is no chapters on e621, just pagination...
98 Story story
= super.process(url
, pg
);
100 Chapter only
= new Chapter(1, null);
101 for (Chapter chap
: story
) {
102 only
.getParagraphs().addAll(chap
.getParagraphs());
105 story
.getChapters().clear();
106 story
.getChapters().add(only
);
112 protected boolean supports(URL url
) {
113 String host
= url
.getHost();
114 if (host
.startsWith("www.")) {
115 host
= host
.substring("www.".length());
118 return ("e621.net".equals(host
) || "e926.net".equals(host
))
119 && (isPool(url
) || isSearch(url
));
123 protected boolean isHtml() {
127 private Image
getCover(URL source
, InputStream in
) throws IOException
{
128 URL urlForCover
= source
;
129 if (isPool(source
)) {
130 urlForCover
= new URL(source
.toString() + "?page=1");
133 String images
= getChapterContent(urlForCover
, in
, 1, null);
134 if (!images
.isEmpty()) {
135 int pos
= images
.indexOf("<br/>");
137 images
= images
.substring(1, pos
- 1);
138 return getImage(this, null, images
);
145 private String
getAuthor(URL source
, InputStream in
) {
146 if (isSearch(source
)) {
147 StringBuilder builder
= new StringBuilder();
148 for (String author
: getTags(source
, in
, true)) {
149 if (builder
.length() > 0)
150 builder
.append(", ");
151 builder
.append(author
);
154 return builder
.toString();
157 String author
= getLine(in
, "href=\"/post/show/", 0);
158 if (author
!= null) {
159 String key
= "href=\"";
160 int pos
= author
.indexOf(key
);
162 author
= author
.substring(pos
+ key
.length());
163 pos
= author
.indexOf("\"");
165 author
= author
.substring(0, pos
- 1);
166 String page
= source
.getProtocol() + "://"
167 + source
.getHost() + author
;
169 InputStream pageIn
= Instance
.getCache().open(
170 new URL(page
), this, false);
172 key
= "class=\"tag-type-artist\"";
173 author
= getLine(pageIn
, key
, 0);
174 if (author
!= null) {
175 pos
= author
.indexOf("<a href=\"");
177 author
= author
.substring(pos
);
178 pos
= author
.indexOf("</a>");
180 author
= author
.substring(0, pos
);
181 return StringUtils
.unhtml(author
);
188 } catch (Exception e
) {
198 private String
getTitle(InputStream in
) {
199 String title
= getLine(in
, "<title>", 0);
201 int pos
= title
.indexOf('>');
203 title
= title
.substring(pos
+ 1);
204 pos
= title
.indexOf('<');
206 title
= title
.substring(0, pos
);
210 if (title
.startsWith("Pool:")) {
211 title
= title
.substring("Pool:".length());
214 title
= StringUtils
.unhtml(title
).trim();
221 protected String
getDesc(URL source
, InputStream in
) throws IOException
{
222 String desc
= getLine(in
, "margin-bottom: 2em;", 0);
225 StringBuilder builder
= new StringBuilder();
227 boolean inTags
= false;
228 for (char car
: desc
.toCharArray()) {
229 if ((inTags
&& car
== '>') || (!inTags
&& car
== '<')) {
238 return builder
.toString().trim();
245 protected List
<Entry
<String
, URL
>> getChapters(URL source
, InputStream in
,
246 Progress pg
) throws IOException
{
247 if (isPool(source
)) {
248 return getChaptersPool(source
, in
, pg
);
249 } else if (isSearch(source
)) {
250 return getChaptersSearch(source
, in
, pg
);
253 return new LinkedList
<Entry
<String
, URL
>>();
256 private List
<Entry
<String
, URL
>> getChaptersSearch(URL source
,
257 InputStream in
, Progress pg
) throws IOException
{
258 List
<Entry
<String
, URL
>> urls
= new ArrayList
<Entry
<String
, URL
>>();
260 String search
= source
.getPath();
261 if (search
.endsWith("/")) {
262 search
= search
.substring(0, search
.length() - 1);
265 int pos
= search
.lastIndexOf('/');
267 search
= search
.substring(pos
+ 1);
270 String baseUrl
= "https://e621.net/post/index/";
271 if (source
.getHost().contains("e926")) {
272 baseUrl
= baseUrl
.replace("e621", "e926");
275 for (int i
= 1; true; i
++) {
276 URL url
= new URL(baseUrl
+ i
+ "/" + search
+ "/");
278 InputStream pageI
= Instance
.getCache().open(url
, this, false);
280 if (getLine(pageI
, "No posts matched your search.", 0) != null)
282 urls
.add(new AbstractMap
.SimpleEntry
<String
, URL
>("Page "
283 + Integer
.toString(i
), url
));
287 } catch (Exception e
) {
292 // They are sorted in reverse order on the website
293 Collections
.reverse(urls
);
297 private List
<Entry
<String
, URL
>> getChaptersPool(URL source
,
298 InputStream in
, Progress pg
) throws IOException
{
299 List
<Entry
<String
, URL
>> urls
= new ArrayList
<Entry
<String
, URL
>>();
300 int last
= 1; // no pool/show when only one page
302 @SuppressWarnings("resource")
303 Scanner scan
= new Scanner(in
, "UTF-8");
304 scan
.useDelimiter("\\n");
305 while (scan
.hasNext()) {
306 String line
= scan
.next();
307 for (int pos
= line
.indexOf(source
.getPath()); pos
>= 0; pos
= line
308 .indexOf(source
.getPath(), pos
+ source
.getPath().length())) {
309 int equalPos
= line
.indexOf("=", pos
);
310 int quotePos
= line
.indexOf("\"", pos
);
311 if (equalPos
>= 0 && quotePos
> equalPos
) {
312 String snum
= line
.substring(equalPos
+ 1, quotePos
);
314 int num
= Integer
.parseInt(snum
);
318 } catch (NumberFormatException e
) {
324 for (int i
= 1; i
<= last
; i
++) {
325 urls
.add(new AbstractMap
.SimpleEntry
<String
, URL
>(Integer
326 .toString(i
), new URL(source
.toString() + "?page=" + i
)));
333 protected String
getChapterContent(URL source
, InputStream in
, int number
,
334 Progress pg
) throws IOException
{
335 StringBuilder builder
= new StringBuilder();
336 String staticSite
= "https://static1.e621.net";
337 if (source
.getHost().contains("e926")) {
338 staticSite
= staticSite
.replace("e621", "e926");
341 String key
= staticSite
+ "/data/preview/";
343 @SuppressWarnings("resource")
344 Scanner scan
= new Scanner(in
, "UTF-8");
345 scan
.useDelimiter("\\n");
346 while (scan
.hasNext()) {
347 String line
= scan
.next();
348 if (line
.contains("class=\"preview")) {
349 for (int pos
= line
.indexOf(key
); pos
>= 0; pos
= line
.indexOf(
350 key
, pos
+ key
.length())) {
351 int endPos
= line
.indexOf("\"", pos
);
353 String id
= line
.substring(pos
+ key
.length(), endPos
);
354 id
= staticSite
+ "/data/" + id
;
356 int dotPos
= id
.lastIndexOf(".");
358 id
= id
.substring(0, dotPos
);
361 builder
.append("]<br/>");
368 return builder
.toString();
372 protected URL
getCanonicalUrl(URL source
) {
373 if (isSearch(source
)) {
374 // /post?tags=tag1+tag2 -> ../post/index/1/tag1%32tag2
375 String key
= "?tags=";
376 if (source
.toString().contains(key
)) {
377 int pos
= source
.toString().indexOf(key
);
378 String tags
= source
.toString().substring(pos
+ key
.length());
379 tags
= tags
.replace("+", "%20");
381 String base
= source
.toString().substring(0, pos
);
382 if (!base
.endsWith("/")) {
385 if (base
.endsWith("/search/")) {
386 base
= base
.substring(0, base
.indexOf("/search/") + 1);
390 return new URL(base
+ "index/1/" + tags
);
391 } catch (MalformedURLException e
) {
392 Instance
.getTraceHandler().error(e
);
397 return super.getCanonicalUrl(source
);
400 private boolean isPool(URL url
) {
401 return url
.getPath().startsWith("/pool/");
404 private boolean isSearch(URL url
) {
405 return url
.getPath().startsWith("/post/index/")
406 || (url
.getPath().equals("/post/search") && url
.getQuery()
407 .startsWith("tags="));