Commit | Line | Data |
---|---|---|
b19b3632 NR |
1 | package be.nikiroo.gofetch.support; |
2 | ||
3 | import be.nikiroo.gofetch.data.Story; | |
4 | import be.nikiroo.gofetch.data.Comment; | |
5 | ||
6 | import java.io.IOException; | |
7 | import java.io.UnsupportedEncodingException; | |
8 | import java.net.URL; | |
9 | import java.net.URLDecoder; | |
10 | import java.util.AbstractMap; | |
11 | import java.util.ArrayList; | |
12 | import java.util.List; | |
13 | import java.util.LinkedList; | |
14 | import java.util.Map.Entry; | |
15 | import java.util.Map; | |
16 | import java.util.HashMap; | |
17 | import java.util.Date; | |
18 | ||
19 | import org.jsoup.nodes.Document; | |
20 | import org.jsoup.nodes.Element; | |
21 | import org.jsoup.nodes.Node; | |
22 | import org.jsoup.select.Elements; | |
23 | ||
24 | /** | |
25 | * Support <a href="https://www.reddit.com/">https://www.reddit.com/</a>. | |
26 | * | |
27 | * @author niki | |
28 | */ | |
29 | public class Reddit extends BasicSupport { | |
30 | @Override | |
31 | public String getDescription() { | |
32 | return "Reddit: The front page of the internet"; | |
33 | } | |
34 | ||
35 | @Override | |
36 | protected List<Entry<URL, String>> getUrls() throws IOException { | |
37 | List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>(); | |
38 | String base = "https://www.reddit.com/r/"; | |
39 | urls.add(new AbstractMap.SimpleEntry<URL, String>( | |
40 | new URL(base + "linux_gaming" + "/new/"), "linux_gaming" | |
41 | )); | |
42 | ||
43 | return urls; | |
44 | } | |
45 | ||
46 | @Override | |
47 | protected List<Element> getArticles(Document doc) { | |
48 | return doc.getElementsByClass("thing"); | |
49 | } | |
50 | ||
51 | @Override | |
52 | protected String getArticleId(Document doc, Element article) { | |
53 | // Use the date, Luke | |
54 | return ""; | |
55 | } | |
56 | ||
57 | @Override | |
58 | protected String getArticleTitle(Document doc, Element article) { | |
59 | return article.getElementsByAttributeValue( | |
60 | "data-event-action", "title").first().text().trim(); | |
61 | } | |
62 | ||
63 | @Override | |
64 | protected String getArticleAuthor(Document doc, Element article) { | |
65 | return article.getElementsByAttributeValueStarting( | |
66 | "href", "/user/" | |
67 | ).text().trim(); | |
68 | } | |
69 | ||
70 | @Override | |
71 | protected String getArticleDate(Document doc, Element article) { | |
72 | return article.getElementsByClass("live-timestamp") | |
73 | .attr("datetime").trim(); | |
74 | } | |
75 | ||
76 | @Override | |
77 | protected String getArticleCategory(Document doc, Element article, | |
78 | String currentCategory) { | |
79 | Elements categEls = article.getElementsByAttributeValueStarting( | |
80 | "href", "/r/" + currentCategory + "/search=?q=flair_name" | |
81 | ); | |
82 | ||
83 | if (categEls.size() > 0) { | |
84 | return currentCategory + ", " | |
85 | + categEls.first().text().trim(); | |
86 | } | |
87 | ||
88 | return currentCategory; | |
89 | } | |
90 | ||
91 | @Override | |
92 | protected String getArticleDetails(Document doc, Element article) { | |
93 | return ""; | |
94 | } | |
95 | ||
96 | @Override | |
97 | protected String getArticleIntUrl(Document doc, Element article) { | |
98 | return article.getElementsByClass("thing").first() | |
99 | .absUrl("data-permalink"); | |
100 | } | |
101 | ||
102 | @Override | |
103 | protected String getArticleExtUrl(Document doc, Element article) { | |
104 | Element url = article.getElementsByAttributeValue( | |
105 | "data-event-action", "title").first(); | |
106 | if (!url.attr("href").trim().startsWith("/")) { | |
107 | return url.absUrl("href"); | |
108 | } | |
109 | ||
110 | return ""; | |
111 | } | |
112 | ||
113 | @Override | |
114 | protected String getArticleContent(Document doc, Element article) { | |
115 | return ""; | |
116 | } | |
117 | ||
118 | @Override | |
119 | protected Element getFullArticle(Document doc) { | |
120 | return doc.getElementsByClass("ckueCN").first(); | |
121 | } | |
122 | ||
123 | @Override | |
124 | protected ElementProcessor getElementProcessorFullArticle() { | |
125 | return new BasicElementProcessor(); | |
126 | } | |
127 | ||
128 | @Override | |
129 | protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) { | |
130 | return doc.getElementsByClass("jHfOJm"); | |
131 | } | |
132 | ||
133 | @Override | |
134 | protected List<Element> getCommentCommentPosts(Document doc, | |
135 | Element container) { | |
136 | List<Element> elements = new LinkedList<Element>(); | |
137 | for (Element el : container.children()) { | |
138 | elements.addAll(el.getElementsByClass("jHfOJm")); | |
139 | } | |
140 | ||
141 | return elements; | |
142 | } | |
143 | ||
144 | @Override | |
145 | protected String getCommentId(Element post) { | |
146 | int level = 1; | |
147 | Elements els = post.getElementsByClass("imyGpC"); | |
148 | if (els.size() > 0) { | |
149 | String l = els.first().text().trim() | |
150 | .replace("level ", ""); | |
151 | try { | |
152 | level = Integer.parseInt(l); | |
153 | } catch(NumberFormatException e) { | |
154 | } | |
155 | } | |
156 | ||
157 | return Integer.toString(level); | |
158 | } | |
159 | ||
160 | @Override | |
161 | protected String getCommentAuthor(Element post) { | |
162 | // Since we have no title, we switch with author | |
163 | return ""; | |
164 | } | |
165 | ||
166 | @Override | |
167 | protected String getCommentTitle(Element post) { | |
168 | // Since we have no title, we switch with author | |
169 | Elements els = post.getElementsByClass("RVnoX"); | |
170 | if (els.size() > 0) { | |
171 | return els.first().text().trim(); | |
172 | } | |
173 | ||
174 | els = post.getElementsByClass("kzePTH"); | |
175 | if (els.size() > 0) { | |
176 | return els.first().text().trim(); | |
177 | } | |
178 | ||
179 | return ""; | |
180 | } | |
181 | ||
182 | @Override | |
183 | protected String getCommentDate(Element post) { | |
184 | return post.getElementsByClass("hJDlLH") | |
185 | .first().text().trim(); | |
186 | } | |
187 | ||
188 | @Override | |
189 | protected Element getCommentContentElement(Element post) { | |
190 | return post.getElementsByClass("ckueCN") | |
191 | .first(); | |
192 | } | |
193 | ||
194 | @Override | |
195 | protected ElementProcessor getElementProcessorComment() { | |
196 | return new BasicElementProcessor(); | |
197 | } | |
198 | ||
199 | @Override | |
200 | public void fetch(Story story) throws IOException { | |
201 | super.fetch(story); | |
202 | ||
203 | List<Comment> comments = new LinkedList<Comment>(); | |
204 | Map<Integer, Comment> lastOfLevel = | |
205 | new HashMap<Integer, Comment>(); | |
206 | ||
207 | for (Comment c : story.getComments()) { | |
208 | int level = Integer.parseInt(c.getId()); | |
209 | lastOfLevel.put(level, c); | |
210 | if (level <= 1) { | |
211 | comments.add(c); | |
212 | } else { | |
213 | Comment parent = lastOfLevel.get(level - 1); | |
214 | if (parent != null ){ | |
215 | parent.add(c); | |
216 | } else { | |
217 | // bad data | |
218 | comments.add(c); | |
219 | } | |
220 | } | |
221 | } | |
222 | ||
223 | story.setComments(comments); | |
224 | } | |
225 | } |