Merge branch 'master' of github.com:nikiroo/gofetch
[gofetch.git] / src / be / nikiroo / gofetch / support / TheRegister.java
CommitLineData
d28c4aac
NR
1package be.nikiroo.gofetch.support;
2
3import java.io.IOException;
4import java.io.InputStream;
5import java.net.URL;
3e62b034 6import java.util.AbstractMap;
d28c4aac 7import java.util.ArrayList;
3e62b034 8import java.util.HashMap;
d28c4aac 9import java.util.List;
3e62b034
NR
10import java.util.Map;
11import java.util.Map.Entry;
d28c4aac
NR
12
13import org.jsoup.helper.DataUtil;
14import org.jsoup.nodes.Document;
15import org.jsoup.nodes.Element;
16import org.jsoup.nodes.Node;
d28c4aac
NR
17
18import be.nikiroo.gofetch.data.Comment;
19import be.nikiroo.gofetch.data.Story;
d28c4aac 20
b34d1f35
NR
21/**
22 * Support <a
23 * href="https://www.theregister.co.uk/">https://www.theregister.co.uk/</a>.
24 *
25 * @author niki
26 */
d28c4aac 27public class TheRegister extends BasicSupport {
3e62b034
NR
28 private Map<String, String> commentReplies = new HashMap<String, String>();
29
d28c4aac
NR
30 @Override
31 public String getDescription() {
32 return "The Register: Biting the hand that feeds IT";
33 }
34
35 @Override
3e62b034
NR
36 public void fetch(Story story) throws IOException {
37 super.fetch(story);
d28c4aac 38
3e62b034
NR
39 // Update comment replies
40 List<Comment> comments = new ArrayList<Comment>();
41 for (Comment comment : story.getComments()) {
42 if (commentReplies.containsKey(comment.getId())) {
43 String inReplyToId = commentReplies.get(comment.getId());
44 Comment inReplyTo = story.getCommentById(inReplyToId);
45 if (inReplyTo != null) {
46 inReplyTo.add(comment);
47 } else {
48 comments.add(comment);
49 }
50 } else {
51 comments.add(comment);
d28c4aac 52 }
3e62b034
NR
53 }
54 story.setComments(comments);
55 }
d28c4aac 56
3e62b034
NR
57 @Override
58 protected List<Entry<URL, String>> getUrls() throws IOException {
59 List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>();
60 urls.add(new AbstractMap.SimpleEntry<URL, String>(new URL(
61 "https://www.theregister.co.uk/"), ""));
62 return urls;
63 }
b34d1f35 64
3e62b034
NR
65 @Override
66 protected List<Element> getArticles(Document doc) {
67 return doc.getElementsByClass("story_link");
68 }
d28c4aac 69
3e62b034
NR
70 @Override
71 protected String getArticleId(Document doc, Element article) {
72 return "";
73 }
d28c4aac 74
3e62b034
NR
75 @Override
76 protected String getArticleTitle(Document doc, Element article) {
77 Element titleElement = article.getElementsByTag("h4").first();
78 if (titleElement != null) {
79 return titleElement.text();
80 }
d28c4aac 81
3e62b034
NR
82 return "";
83 }
84
85 @Override
86 protected String getArticleAuthor(Document doc, Element article) {
87 return "";
88 }
d28c4aac 89
3e62b034
NR
90 @Override
91 protected String getArticleDate(Document doc, Element article) {
92 Element dateElement = article.getElementsByClass("time_stamp").first();
93 if (dateElement != null) {
94 return dateElement.attr("data-epoch");
d28c4aac
NR
95 }
96
3e62b034 97 return "";
d28c4aac
NR
98 }
99
100 @Override
3e62b034
NR
101 protected String getArticleCategory(Document doc, Element article,
102 String currentCategory) {
103 Element categElement = article.previousElementSibling();
104 if (categElement != null) {
105 return categElement.text();
106 }
107
108 return "";
109 }
110
111 @Override
112 protected String getArticleDetails(Document doc, Element article) {
113 // We have some "details" but no content, so we switch them:
114 return "";
115 }
116
117 @Override
118 protected String getArticleIntUrl(Document doc, Element article) {
119 return article.absUrl("href");
120 }
121
122 @Override
123 protected String getArticleExtUrl(Document doc, Element article) {
124 return "";
125 }
126
127 @Override
128 protected String getArticleContent(Document doc, Element article) {
129 // We have some "details" but no content, so we switch them:
130 Element detailsElement = article.getElementsByClass("standfirst")
131 .first();
132 if (detailsElement != null) {
e818d449 133 return getArticleText(detailsElement);
3e62b034
NR
134 }
135
136 return "";
137 }
138
139 @Override
140 protected Element getFullArticle(Document doc) {
141 return doc.getElementById("body");
142 }
d28c4aac 143
3e62b034
NR
144 @Override
145 protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) {
146 List<Element> commentElements = new ArrayList<Element>();
147
148 // Get comments URL then parse it
d28c4aac 149 try {
3e62b034
NR
150 URL url = new URL("https://forums.theregister.co.uk/forum/1"
151 + intUrl.getPath());
a71d4075 152 InputStream in = open(url);
3e62b034
NR
153 try {
154 doc = DataUtil.load(in, "UTF-8", url.toString());
155 Element posts = doc.getElementById("forum_posts");
156 if (posts != null) {
157 for (Element post : posts.getElementsByClass("post")) {
158 commentElements.add(post);
159 Element inReplyTo = post.getElementsByClass(
160 "in-reply-to").first();
161 if (inReplyTo != null) {
162 String parentId = inReplyTo.absUrl("href");
163 if (parentId != null && parentId.contains("/")) {
164 int i = parentId.lastIndexOf('/');
165 parentId = parentId.substring(i + 1);
166
167 commentReplies
168 .put(getCommentId(post), parentId);
169 }
170 }
171 }
d28c4aac 172 }
3e62b034
NR
173 } finally {
174 in.close();
175 }
176 } catch (IOException e) {
177 }
178
179 return commentElements;
180 }
181
182 @Override
183 protected ElementProcessor getElementProcessorFullArticle() {
184 return new BasicElementProcessor();
185 }
186
187 @Override
188 protected List<Element> getCommentCommentPosts(Document doc,
189 Element container) {
190 return null;
191 }
d28c4aac 192
3e62b034
NR
193 @Override
194 protected String getCommentId(Element post) {
195 Element idE = post.getElementsByTag("a").first();
196 if (idE != null) {
197 String id = idE.attr("id");
198 if (id.startsWith("c_")) {
199 id = id.substring(2);
d28c4aac
NR
200 }
201
3e62b034
NR
202 return id;
203 }
7686553a 204
3e62b034
NR
205 return "";
206 }
d28c4aac 207
3e62b034
NR
208 @Override
209 protected String getCommentAuthor(Element post) {
210 Element author = post.getElementsByClass("author").first();
211 if (author != null) {
212 return author.text();
213 }
d28c4aac 214
3e62b034
NR
215 return "";
216 }
d28c4aac 217
3e62b034
NR
218 @Override
219 protected String getCommentTitle(Element post) {
220 Element title = post.getElementsByTag("h4").first();
221 if (title != null) {
222 return title.text();
223 }
d28c4aac 224
3e62b034
NR
225 return "";
226 }
227
228 @Override
229 protected String getCommentDate(Element post) {
230 Element id = post.getElementsByTag("a").first();
231 if (id != null) {
232 Element date = id.getElementsByTag("span").first();
233 if (date != null) {
234 return date.attr("data-epoch");
235 }
236 }
237
238 return "";
239 }
240
241 @Override
242 protected Element getCommentContentElement(Element post) {
243 return post.getElementsByClass("body").first();
244 }
7686553a 245
3e62b034
NR
246 @Override
247 protected ElementProcessor getElementProcessorComment() {
248 return new BasicElementProcessor() {
249 @Override
250 public boolean ignoreNode(Node node) {
251 // Remove the comment title (which has
252 // already been processed earlier)
253 if (node instanceof Element) {
254 Element el = (Element) node;
255 if ("h4".equals(el.tagName())) {
256 return true;
7686553a 257 }
d28c4aac 258 }
3e62b034
NR
259
260 return false;
d28c4aac 261 }
3e62b034 262 };
d28c4aac 263 }
d28c4aac 264}