improve perf for getCover
[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / YiffStar.java
1 package be.nikiroo.fanfix.supported;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.MalformedURLException;
6 import java.net.URL;
7 import java.util.AbstractMap;
8 import java.util.ArrayList;
9 import java.util.HashMap;
10 import java.util.List;
11 import java.util.Map;
12 import java.util.Map.Entry;
13 import java.util.Scanner;
14
15 import be.nikiroo.fanfix.Instance;
16 import be.nikiroo.fanfix.bundles.Config;
17 import be.nikiroo.fanfix.data.MetaData;
18 import be.nikiroo.utils.Image;
19 import be.nikiroo.utils.Progress;
20 import be.nikiroo.utils.StringUtils;
21
22 /**
23 * Support class for <a href="https://sofurry.com/">SoFurry.com</a>, a Furry
24 * website supporting images and stories (we only retrieve the stories).
25 *
26 * @author niki
27 */
28 class YiffStar extends BasicSupport_Deprecated {
29
30 @Override
31 public String getSourceName() {
32 return "YiffStar";
33 }
34
35 @Override
36 protected MetaData getMeta(URL source, InputStream in) throws IOException {
37 MetaData meta = new MetaData();
38
39 meta.setTitle(getTitle(reset(in)));
40 meta.setAuthor(getAuthor(reset(in)));
41 meta.setDate("");
42 meta.setTags(getTags(reset(in)));
43 meta.setSource(getSourceName());
44 meta.setUrl(source.toString());
45 meta.setPublisher(getSourceName());
46 meta.setUuid(source.toString());
47 meta.setLuid("");
48 meta.setLang("en");
49 meta.setSubject("Furry");
50 meta.setType(getType().toString());
51 meta.setImageDocument(false);
52 meta.setCover(getCover(source, reset(in)));
53
54 return meta;
55 }
56
57 @Override
58 protected boolean supports(URL url) {
59 String host = url.getHost();
60 if (host.startsWith("www.")) {
61 host = host.substring("www.".length());
62 }
63
64 return "sofurry.com".equals(host);
65 }
66
67 @Override
68 protected boolean isHtml() {
69 return true;
70 }
71
72 @Override
73 public void login() throws IOException {
74 // Note: this should not be necessary anymore
75 // (the "/guest" trick is enough)
76 String login = Instance.getConfig().getString(
77 Config.LOGIN_YIFFSTAR_USER);
78 String password = Instance.getConfig().getString(
79 Config.LOGIN_YIFFSTAR_PASS);
80
81 if (login != null && !login.isEmpty() && password != null
82 && !password.isEmpty()) {
83 Map<String, String> post = new HashMap<String, String>();
84 post.put("sfLoginUsername", login);
85 post.put("sfLoginPassword", password);
86 post.put("YII_CSRF_TOKEN", "");
87
88 // Cookies will actually be retained by the cache manager once
89 // logged in
90 Instance.getCache()
91 .openNoCache(new URL("https://www.sofurry.com/user/login"),
92 this, post, null, null).close();
93 }
94 }
95
96 @Override
97 public URL getCanonicalUrl(URL source) {
98 try {
99 if (source.getPath().startsWith("/view")) {
100 source = new URL(source.toString() + "/guest");
101 InputStream in = Instance.getCache().open(source, this, false);
102 String line = getLine(in, "/browse/folder/", 0);
103 if (line != null) {
104 String[] tab = line.split("\"");
105 if (tab.length > 1) {
106 String groupUrl = source.getProtocol() + "://"
107 + source.getHost() + tab[1];
108 return guest(groupUrl);
109 }
110 }
111 }
112 } catch (Exception e) {
113 Instance.getTraceHandler().error(e);
114 }
115
116 return super.getCanonicalUrl(source);
117 }
118
119 private List<String> getTags(InputStream in) {
120 List<String> tags = new ArrayList<String>();
121
122 String line = getLine(in, "class=\"sf-story-big-tags", 0);
123 if (line != null) {
124 String[] tab = StringUtils.unhtml(line).split(",");
125 for (String possibleTag : tab) {
126 String tag = possibleTag.trim();
127 if (!tag.isEmpty() && !tag.equals("...") && !tags.contains(tag)) {
128 tags.add(tag);
129 }
130 }
131 }
132
133 return tags;
134 }
135
136 private Image getCover(URL source, InputStream in) throws IOException {
137
138 List<Entry<String, URL>> chaps = getChapters(source, in, null);
139 if (!chaps.isEmpty()) {
140 in = Instance.getCache().open(chaps.get(0).getValue(), this, true);
141 String line = getLine(in, " name=\"og:image\"", 0);
142 if (line != null) {
143 int pos = -1;
144 for (int i = 0; i < 3; i++) {
145 pos = line.indexOf('"', pos + 1);
146 }
147
148 if (pos >= 0) {
149 line = line.substring(pos + 1);
150 pos = line.indexOf('"');
151 if (pos >= 0) {
152 line = line.substring(0, pos);
153 if (line.contains("/thumb?")) {
154 line = line.replace("/thumb?",
155 "/auxiliaryContent?type=25&");
156 return getImage(this, null, line);
157 }
158 }
159 }
160 }
161 }
162
163 return null;
164 }
165
166 private String getAuthor(InputStream in) {
167 String author = getLine(in, "class=\"onlinestatus", 0);
168 if (author != null) {
169 return StringUtils.unhtml(author).trim();
170 }
171
172 return null;
173 }
174
175 private String getTitle(InputStream in) {
176 String title = getLine(in, "class=\"sflabel pagetitle", 0);
177 if (title != null) {
178 if (title.contains("(series)")) {
179 title = title.replace("(series)", "");
180 }
181 return StringUtils.unhtml(title).trim();
182 }
183
184 return null;
185 }
186
187 @Override
188 protected String getDesc(URL source, InputStream in) throws IOException {
189 return null; // TODO: no description at all? Cannot find one...
190 }
191
192 @Override
193 protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
194 Progress pg) throws IOException {
195 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
196
197 @SuppressWarnings("resource")
198 Scanner scan = new Scanner(in, "UTF-8");
199 scan.useDelimiter("\\n");
200 while (scan.hasNext()) {
201 String line = scan.next();
202 if (line.contains("\"/view/") && line.contains("title=")) {
203 String[] tab = line.split("\"");
204 if (tab.length > 5) {
205 String link = tab[5];
206 if (link.startsWith("/")) {
207 link = source.getProtocol() + "://" + source.getHost()
208 + link;
209 }
210 urls.add(new AbstractMap.SimpleEntry<String, URL>(
211 StringUtils.unhtml(line).trim(), guest(link)));
212 }
213 }
214 }
215
216 return urls;
217 }
218
219 @Override
220 protected String getChapterContent(URL source, InputStream in, int number,
221 Progress pg) throws IOException {
222 StringBuilder builder = new StringBuilder();
223
224 String startAt = "id=\"sfContentBody";
225 String endAt = "id=\"recommendationArea";
226 boolean ok = false;
227
228 @SuppressWarnings("resource")
229 Scanner scan = new Scanner(in, "UTF-8");
230 scan.useDelimiter("\\n");
231 while (scan.hasNext()) {
232 String line = scan.next();
233 if (!ok && line.contains(startAt)) {
234 ok = true;
235 } else if (ok && line.contains(endAt)) {
236 ok = false;
237 break;
238 }
239
240 if (ok) {
241 builder.append(line);
242 builder.append(' ');
243 }
244 }
245
246 return builder.toString();
247 }
248
249 /**
250 * Return a {@link URL} from the given link, but add the "/guest" part to it
251 * to make sure we don't need to be logged-in to see it.
252 *
253 * @param link
254 * the link
255 *
256 * @return the {@link URL}
257 *
258 * @throws MalformedURLException
259 * in case of data error
260 */
261 private URL guest(String link) throws MalformedURLException {
262 if (link.contains("?")) {
263 if (link.contains("/?")) {
264 return new URL(link.replace("?", "guest?"));
265 }
266
267 return new URL(link.replace("?", "/guest?"));
268 }
269
270 return new URL(link + "/guest");
271 }
272 }