Update conversion tests and test files
[nikiroo-utils.git] / src / be / nikiroo / fanfix / supported / YiffStar.java
CommitLineData
a4143cd7
NR
1package be.nikiroo.fanfix.supported;
2
a4143cd7
NR
3import java.io.IOException;
4import java.io.InputStream;
13285ff8 5import java.net.MalformedURLException;
a4143cd7
NR
6import java.net.URL;
7import java.util.ArrayList;
6e06d2cc 8import java.util.HashMap;
a4143cd7
NR
9import java.util.List;
10import java.util.Map;
11import java.util.Map.Entry;
12import java.util.Scanner;
13
14import be.nikiroo.fanfix.Instance;
6e06d2cc 15import be.nikiroo.fanfix.bundles.Config;
a4143cd7 16import be.nikiroo.fanfix.data.MetaData;
16a81ef7 17import be.nikiroo.utils.Image;
ed08c171 18import be.nikiroo.utils.Progress;
a4143cd7
NR
19import be.nikiroo.utils.StringUtils;
20
21/**
22 * Support class for <a href="https://sofurry.com/">SoFurry.com</a>, a Furry
23 * website supporting images and stories (we only retrieve the stories).
24 *
25 * @author niki
26 */
0ffa4754 27class YiffStar extends BasicSupport_Deprecated {
a4143cd7
NR
28
29 @Override
30 public String getSourceName() {
31 return "YiffStar";
32 }
33
34 @Override
35 protected MetaData getMeta(URL source, InputStream in) throws IOException {
36 MetaData meta = new MetaData();
37
38 meta.setTitle(getTitle(reset(in)));
211f7ddb 39 meta.setAuthor(getAuthor(reset(in)));
a4143cd7
NR
40 meta.setDate("");
41 meta.setTags(getTags(reset(in)));
42 meta.setSource(getSourceName());
43 meta.setUrl(source.toString());
44 meta.setPublisher(getSourceName());
45 meta.setUuid(source.toString());
46 meta.setLuid("");
47 meta.setLang("EN");
48 meta.setSubject("Furry");
49 meta.setType(getType().toString());
50 meta.setImageDocument(false);
51 meta.setCover(getCover(source, reset(in)));
52
53 return meta;
54 }
55
56 @Override
57 protected boolean supports(URL url) {
58 String host = url.getHost();
59 if (host.startsWith("www.")) {
60 host = host.substring("www.".length());
61 }
62
63 return "sofurry.com".equals(host);
64 }
65
66 @Override
67 protected boolean isHtml() {
68 return true;
69 }
70
71 @Override
6e06d2cc 72 public void login() throws IOException {
13285ff8
NR
73 // Note: this should not be necessary anymore
74 // (the "/guest" trick is enough)
75 String login = Instance.getConfig().getString(
76 Config.LOGIN_YIFFSTAR_USER);
77 String password = Instance.getConfig().getString(
78 Config.LOGIN_YIFFSTAR_PASS);
79
80 if (login != null && !login.isEmpty() && password != null
81 && !password.isEmpty()) {
82 Map<String, String> post = new HashMap<String, String>();
83 post.put("sfLoginUsername", login);
84 post.put("sfLoginPassword", password);
85 post.put("YII_CSRF_TOKEN", "");
86
87 // Cookies will actually be retained by the cache manager once
88 // logged in
89 Instance.getCache()
90 .openNoCache(new URL("https://www.sofurry.com/user/login"),
315f14ae 91 this, post, null, null).close();
13285ff8 92 }
a4143cd7
NR
93 }
94
95 @Override
0ffa4754
NR
96 public URL getCanonicalUrl(URL source) {
97 try {
98 if (source.getPath().startsWith("/view")) {
99 source = new URL(source.toString() + "/guest");
100 InputStream in = Instance.getCache().open(source, this, false);
101 String line = getLine(in, "/browse/folder/", 0);
102 if (line != null) {
103 String[] tab = line.split("\"");
104 if (tab.length > 1) {
105 String groupUrl = source.getProtocol() + "://"
106 + source.getHost() + tab[1];
107 return guest(groupUrl);
108 }
6e06d2cc 109 }
a4143cd7 110 }
0ffa4754
NR
111 } catch (Exception e) {
112 Instance.getTraceHandler().error(e);
a4143cd7
NR
113 }
114
115 return super.getCanonicalUrl(source);
116 }
117
118 private List<String> getTags(InputStream in) {
119 List<String> tags = new ArrayList<String>();
120
121 String line = getLine(in, "class=\"sf-story-big-tags", 0);
122 if (line != null) {
123 String[] tab = StringUtils.unhtml(line).split(",");
124 for (String possibleTag : tab) {
125 String tag = possibleTag.trim();
126 if (!tag.isEmpty() && !tag.equals("...") && !tags.contains(tag)) {
127 tags.add(tag);
128 }
129 }
130 }
131
132 return tags;
133 }
134
16a81ef7 135 private Image getCover(URL source, InputStream in) throws IOException {
a4143cd7 136
ed08c171 137 List<Entry<String, URL>> chaps = getChapters(source, in, null);
a4143cd7
NR
138 if (!chaps.isEmpty()) {
139 in = Instance.getCache().open(chaps.get(0).getValue(), this, true);
140 String line = getLine(in, " name=\"og:image\"", 0);
141 if (line != null) {
142 int pos = -1;
143 for (int i = 0; i < 3; i++) {
144 pos = line.indexOf('"', pos + 1);
145 }
146
147 if (pos >= 0) {
148 line = line.substring(pos + 1);
149 pos = line.indexOf('"');
150 if (pos >= 0) {
151 line = line.substring(0, pos);
152 if (line.contains("/thumb?")) {
153 line = line.replace("/thumb?",
154 "/auxiliaryContent?type=25&");
155 return getImage(this, null, line);
156 }
157 }
158 }
159 }
160 }
161
162 return null;
163 }
164
211f7ddb 165 private String getAuthor(InputStream in) {
a4143cd7
NR
166 String author = getLine(in, "class=\"onlinestatus", 0);
167 if (author != null) {
168 return StringUtils.unhtml(author).trim();
169 }
170
171 return null;
172 }
173
211f7ddb 174 private String getTitle(InputStream in) {
a4143cd7
NR
175 String title = getLine(in, "class=\"sflabel pagetitle", 0);
176 if (title != null) {
177 if (title.contains("(series)")) {
178 title = title.replace("(series)", "");
179 }
180 return StringUtils.unhtml(title).trim();
181 }
182
183 return null;
184 }
185
186 @Override
187 protected String getDesc(URL source, InputStream in) throws IOException {
188 return null; // TODO: no description at all? Cannot find one...
189 }
190
191 @Override
ed08c171
NR
192 protected List<Entry<String, URL>> getChapters(URL source, InputStream in,
193 Progress pg) throws IOException {
a4143cd7
NR
194 List<Entry<String, URL>> urls = new ArrayList<Entry<String, URL>>();
195
196 @SuppressWarnings("resource")
197 Scanner scan = new Scanner(in, "UTF-8");
198 scan.useDelimiter("\\n");
199 while (scan.hasNext()) {
200 String line = scan.next();
201 if (line.contains("\"/view/") && line.contains("title=")) {
202 String[] tab = line.split("\"");
203 if (tab.length > 5) {
204 String link = tab[5];
205 if (link.startsWith("/")) {
206 link = source.getProtocol() + "://" + source.getHost()
207 + link;
208 }
13285ff8 209 final URL value = guest(link);
a4143cd7
NR
210 final String key = StringUtils.unhtml(line).trim();
211 urls.add(new Entry<String, URL>() {
211f7ddb 212 @Override
a4143cd7
NR
213 public URL setValue(URL value) {
214 return null;
215 }
216
211f7ddb 217 @Override
a4143cd7
NR
218 public URL getValue() {
219 return value;
220 }
221
211f7ddb 222 @Override
a4143cd7
NR
223 public String getKey() {
224 return key;
225 }
226 });
227 }
228 }
229 }
230
231 return urls;
232 }
233
234 @Override
ed08c171
NR
235 protected String getChapterContent(URL source, InputStream in, int number,
236 Progress pg) throws IOException {
a4143cd7
NR
237 StringBuilder builder = new StringBuilder();
238
239 String startAt = "id=\"sfContentBody";
240 String endAt = "id=\"recommendationArea";
241 boolean ok = false;
242
243 @SuppressWarnings("resource")
244 Scanner scan = new Scanner(in, "UTF-8");
245 scan.useDelimiter("\\n");
246 while (scan.hasNext()) {
247 String line = scan.next();
248 if (!ok && line.contains(startAt)) {
249 ok = true;
250 } else if (ok && line.contains(endAt)) {
251 ok = false;
252 break;
253 }
254
255 if (ok) {
256 builder.append(line);
406447a4 257 builder.append(' ');
a4143cd7
NR
258 }
259 }
260
261 return builder.toString();
262 }
13285ff8
NR
263
264 /**
265 * Return a {@link URL} from the given link, but add the "/guest" part to it
266 * to make sure we don't need to be logged-in to see it.
267 *
268 * @param link
269 * the link
270 *
271 * @return the {@link URL}
272 *
273 * @throws MalformedURLException
0efd25e3 274 * in case of data error
13285ff8
NR
275 */
276 private URL guest(String link) throws MalformedURLException {
277 if (link.contains("?")) {
278 if (link.contains("/?")) {
279 return new URL(link.replace("?", "guest?"));
13285ff8 280 }
211f7ddb
NR
281
282 return new URL(link.replace("?", "/guest?"));
13285ff8 283 }
211f7ddb
NR
284
285 return new URL(link + "/guest");
13285ff8 286 }
a4143cd7 287}