Update from master
[nikiroo-utils.git] / supported / YiffStar.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.MalformedURLException;
6 import java.net.URL;
7 import java.util.AbstractMap;
8 import java.util.ArrayList;
9 import java.util.HashMap;
10 import java.util.List;
11 import java.util.Map;
12 import java.util.Map.Entry;
13 import java.util.Scanner;
14
15 import be.nikiroo.fanfix.Instance;
16 import be.nikiroo.fanfix.bundles.Config;
17 import be.nikiroo.fanfix.data.MetaData;
18 import be.nikiroo.utils.Image;
19 import be.nikiroo.utils.Progress;
20 import be.nikiroo.utils.StringUtils;
21
22 /**
23 * Support class for <a href="https://sofurry.com/">SoFurry.com</a>, a Furry
24 * website supporting images and stories (we only retrieve the stories).
25 *
26 * @author niki
27 */
28 class YiffStar extends BasicSupport_Deprecated {
29 @Override
30 protected MetaData getMeta(URL source, InputStream in) throws IOException {
31 MetaData meta = new MetaData();
32
33 meta.setTitle(getTitle(reset(in)));
34 meta.setAuthor(getAuthor(reset(in)));
35 meta.setDate("");
36 meta.setTags(getTags(reset(in)));
37 meta.setSource(getType().getSourceName());
38 meta.setUrl(source.toString());
39 meta.setPublisher(getType().getSourceName());
40 meta.setUuid(source.toString());
41 meta.setLuid("");
42 meta.setLang("en");
43 meta.setSubject("Furry");
44 meta.setType(getType().toString());
45 meta.setImageDocument(false);
46 meta.setCover(getCover(source, reset(in)));
47
48 return meta;
49 }
50
51 @Override
52 protected boolean supports(URL url) {
53 String host = url.getHost();
54 if (host.startsWith("www.")) {
55 host = host.substring("www.".length());
56 }
57
58 return "sofurry.com".equals(host);
59 }
60
61 @Override
62 protected boolean isHtml() {
63 return true;
64 }
65
66 @Override
67 public void login() throws IOException {
68 // Note: this should not be necessary anymore
69 // (the "/guest" trick is enough)
70 String login = Instance.getInstance().getConfig().getString(Config.LOGIN_YIFFSTAR_USER);
71 String password = Instance.getInstance().getConfig().getString(Config.LOGIN_YIFFSTAR_PASS);
72
73 if (login != null && !login.isEmpty() && password != null
74 && !password.isEmpty()) {
75
76 Map<String, String> post = new HashMap<String, String>();
77 post.put("LoginForm[sfLoginUsername]", login);
78 post.put("LoginForm[sfLoginPassword]", password);
79 post.put("YII_CSRF_TOKEN", "");
80 post.put("yt1", "Login");
81 post.put("returnUrl", "/");
82
83 // Cookies will actually be retained by the cache manager once
84 // logged in
85 Instance.getInstance().getCache()
86 .openNoCache(new URL("https://www.sofurry.com/user/login"), this, post, null, null).close();
87 }
88 }
89
90 @Override
91 public URL getCanonicalUrl(URL source) {
92 try {
93 if (source.getPath().startsWith("/view")) {
94 source = guest(source.toString());
95 // NO CACHE because we don't want the NotLoggedIn message later
96 InputStream in = Instance.getInstance().getCache().openNoCache(source, this, null, null, null);
97 String line = getLine(in, "/browse/folder/", 0);
98 if (line != null) {
99 String[] tab = line.split("\"");
100 if (tab.length > 1) {
101 String groupUrl = source.getProtocol() + "://"
102 + source.getHost() + tab[1];
103 return guest(groupUrl);
104 }
105 }
106 }
107 } catch (Exception e) {
108 Instance.getInstance().getTraceHandler().error(e);
109 }
110
111 return super.getCanonicalUrl(source);
112 }
113
114 private List<String> getTags(InputStream in) {
115 List<String> tags = new ArrayList<String>();
116
117 String line = getLine(in, "class=\"sf-story-big-tags", 0);
118 if (line != null) {
119 String[] tab = StringUtils.unhtml(line).split(",");
120 for (String possibleTag : tab) {
121 String tag = possibleTag.trim();
122 if (!tag.isEmpty() && !tag.equals("...") && !tags.contains(tag)) {
123 tags.add(tag);
124 }
125 }
126 }
127
128 return tags;
129 }
130
131 private Image getCover(URL source, InputStream in) throws IOException {
132
133 List<Entry<String, URL>> chaps = getChapters(source, in, null);
134 if (!chaps.isEmpty()) {
135 in = Instance.getInstance().getCache().open(chaps.get(0).getValue(), this, true);
136 String line = getLine(in, " name=\"og:image\"", 0);
137 if (line != null) {
138 int pos = -1;
139 for (int i = 0; i < 3; i++) {
140 pos = line.indexOf('"', pos + 1);
141 }
142
143 if (pos >= 0) {
144 line = line.substring(pos + 1);
145 pos = line.indexOf('"');
146 if (pos >= 0) {
147 line = line.substring(0, pos);
148 if (line.contains("/thumb?")) {
149 line = line.replace("/thumb?",
150 "/auxiliaryContent?type=25&");
151 return getImage(this, null, line);
152 }
153 }
154 }
155 }
156 }
157
158 return null;
159 }
160
161 private String getAuthor(InputStream in) {
162 String author = getLine(in, "class=\"onlinestatus", 0);
163 if (author != null) {
164 return StringUtils.unhtml(author).trim();
165 }
166
167 return null;
168 }
169
170 private String getTitle(InputStream in) {
171 String title = getLine(in, "class=\"sflabel pagetitle", 0);
172 if (title != null) {
173 if (title.contains("(series)")) {
174 title = title.replace("(series)", "");
175 }
176 return StringUtils.unhtml(title).trim();
177 }
178
179 return null;
180 }
181
182 @Override
183 protected String getDesc(URL source, InputStream in) throws IOException {
184 return null; // TODO: no description at all? Cannot find one...
185 }
186
187 @Override
188 protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
189 Progress pg) throws IOException {
190 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
191
192 @SuppressWarnings("resource")
193 Scanner scan = new Scanner(in, "UTF-8");
194 scan.useDelimiter("\\n");
195 while (scan.hasNext()) {
196 String line = scan.next();
197 if (line.contains("\"/view/") && line.contains("title=")) {
198 String[] tab = line.split("\"");
199 if (tab.length > 5) {
200 String link = tab[5];
201 if (link.startsWith("/")) {
202 link = source.getProtocol() + "://" + source.getHost()
203 + link;
204 }
205 urls.add(new AbstractMap.SimpleEntry<String, URL>(
206 StringUtils.unhtml(line).trim(), guest(link)));
207 }
208 }
209 }
210
211 return urls;
212 }
213
214 @Override
215 protected String getChapterContent(URL source, InputStream in, int number,
216 Progress pg) throws IOException {
217 StringBuilder builder = new StringBuilder();
218
219 String startAt = "id=\"sfContentBody";
220 String endAt = "id=\"recommendationArea";
221 boolean ok = false;
222
223 @SuppressWarnings("resource")
224 Scanner scan = new Scanner(in, "UTF-8");
225 scan.useDelimiter("\\n");
226 while (scan.hasNext()) {
227 String line = scan.next();
228 if (!ok && line.contains(startAt)) {
229 ok = true;
230 } else if (ok && line.contains(endAt)) {
231 ok = false;
232 break;
233 }
234
235 if (ok) {
236 builder.append(line);
237 builder.append(' ');
238 }
239 }
240
241 return builder.toString();
242 }
243
244 /**
245 * Return a {@link URL} from the given link, but add the "/guest" part to it
246 * to make sure we don't need to be logged-in to see it.
247 *
248 * @param link
249 * the link
250 *
251 * @return the {@link URL}
252 *
253 * @throws MalformedURLException
254 * in case of data error
255 */
256 private URL guest(String link) throws MalformedURLException {
257 if (link.contains("?")) {
258 if (link.contains("/?")) {
259 return new URL(link.replace("?", "guest?"));
260 }
261
262 return new URL(link.replace("?", "/guest?"));
263 }
264
265 return new URL(link + "/guest");
266 }
267 }