057ed9f4172ced34a74129977b30054e98d210f9
1 package be
.nikiroo
.gofetch
.support
;
3 import java
.io
.IOException
;
5 import java
.util
.AbstractMap
;
6 import java
.util
.ArrayList
;
8 import java
.util
.Map
.Entry
;
10 import org
.jsoup
.nodes
.Document
;
11 import org
.jsoup
.nodes
.Element
;
12 import org
.jsoup
.nodes
.Node
;
13 import org
.jsoup
.select
.Elements
;
16 * Support <a href='https://slashdot.org/'>https://slashdot.org/</a>.
20 public class Slashdot
extends BasicSupport
{
22 public String
getDescription() {
23 return "Slashdot: News for nerds, stuff that matters!";
27 protected List
<Entry
<URL
, String
>> getUrls() throws IOException
{
28 List
<Entry
<URL
, String
>> urls
= new ArrayList
<Entry
<URL
, String
>>();
29 urls
.add(new AbstractMap
.SimpleEntry
<URL
, String
>(new URL(
30 "https://slashdot.org/"), ""));
35 protected List
<Element
> getArticles(Document doc
) {
36 return doc
.getElementsByTag("header");
40 protected String
getArticleId(Document doc
, Element article
) {
41 Element title
= article
.getElementsByClass("story-title").first();
43 String id
= title
.attr("id");
44 if (id
.startsWith("title-")) {
45 id
= id
.substring("title-".length());
55 protected String
getArticleTitle(Document doc
, Element article
) {
56 Element title
= article
.getElementsByClass("story-title").first();
65 protected String
getArticleAuthor(Document doc
, Element article
) {
66 // details: "Posted by AUTHOR on DATE from the further-crackdown dept."
67 String details
= getArticleDetailsReal(article
);
68 int pos
= details
.indexOf(" on ");
69 if (details
.startsWith("Posted by ") && pos
>= 0) {
70 return details
.substring("Posted by ".length(), pos
).trim();
77 protected String
getArticleDate(Document doc
, Element article
) {
78 // Do not try bad articles
79 if (getArticleId(doc
, article
).isEmpty()) {
83 Element dateElement
= doc
.getElementsByTag("time").first();
84 if (dateElement
!= null) {
85 String date
= dateElement
.text().trim();
86 if (date
.startsWith("on ")) {
87 date
= date
.substring("on ".length());
97 protected String
getArticleCategory(Document doc
, Element article
,
98 String currentCategory
) {
99 Element categElement
= doc
.getElementsByClass("topic").first();
100 if (categElement
!= null) {
101 return categElement
.text();
108 protected String
getArticleDetails(Document doc
, Element article
) {
109 // details: "Posted by AUTHOR on DATE from the further-crackdown dept."
110 String details
= getArticleDetailsReal(article
);
111 int pos
= details
.indexOf(" from the ");
113 return details
.substring(pos
).trim();
120 protected String
getArticleIntUrl(Document doc
, Element article
) {
121 Element title
= article
.getElementsByClass("story-title").first();
123 Elements links
= title
.getElementsByTag("a");
124 if (links
.size() > 0) {
125 return links
.get(0).absUrl("href");
132 protected String
getArticleExtUrl(Document doc
, Element article
) {
133 Element title
= article
.getElementsByClass("story-title").first();
135 Elements links
= title
.getElementsByTag("a");
136 if (links
.size() > 1) {
137 return links
.get(1).absUrl("href");
144 protected String
getArticleContent(Document doc
, Element article
) {
145 Element contentElement
= doc
//
146 .getElementById("text-" + getArticleId(doc
, article
));
147 if (contentElement
!= null) {
148 return getArticleText(contentElement
);
155 protected Element
getFullArticle(Document doc
) {
160 protected List
<Element
> getFullArticleCommentPosts(Document doc
, URL intUrl
) {
161 List
<Element
> commentElements
= new ArrayList
<Element
>();
162 Element listing
= doc
.getElementById("commentlisting");
163 if (listing
!= null) {
164 for (Element commentElement
: listing
.children()) {
165 if (commentElement
.hasClass("comment")) {
166 commentElements
.add(commentElement
);
171 return commentElements
;
175 protected ElementProcessor
getElementProcessorFullArticle() {
176 return new BasicElementProcessor() {
178 public boolean detectQuote(Node node
) {
179 if (node
instanceof Element
) {
180 Element element
= (Element
) node
;
181 if (element
.tagName().equals("i")) {
191 protected List
<Element
> getCommentCommentPosts(Document doc
,
193 List
<Element
> commentElements
= new ArrayList
<Element
>();
194 for (Element child
: container
.children()) {
195 if (child
.id().contains("commtree_")) {
196 for (Element sub
: child
.children()) {
197 if (sub
.hasClass("comment")) {
198 commentElements
.add(sub
);
204 return commentElements
;
208 protected String
getCommentId(Element post
) {
209 if (post
.hasClass("hidden")) {
217 protected String
getCommentAuthor(Element post
) {
218 if (post
.hasClass("hidden")) {
222 Element author
= post
.getElementsByClass("by").first();
223 if (author
!= null) {
224 return author
.text();
231 protected String
getCommentTitle(Element post
) {
232 if (post
.hasClass("hidden")) {
236 Element title
= post
.getElementsByClass("title").first();
245 protected String
getCommentDate(Element post
) {
246 if (post
.hasClass("hidden")) {
250 Element date
= post
.getElementsByClass("otherdetails").first();
259 protected Element
getCommentContentElement(Element post
) {
260 if (post
.hasClass("hidden")) {
264 return post
.getElementsByClass("commentBody").first();
268 protected ElementProcessor
getElementProcessorComment() {
269 return new BasicElementProcessor() {
271 public String
processText(String text
) {
272 while (text
.startsWith(">")) { // comment in one-liners
273 text
= text
.substring(1).trim();
280 public boolean detectQuote(Node node
) {
281 if (node
instanceof Element
) {
282 Element elementNode
= (Element
) node
;
283 if (elementNode
.tagName().equals("blockquote")
284 || elementNode
.hasClass("quote")
285 || (elementNode
.tagName().equals("p")
286 && elementNode
.textNodes().size() == 1 && elementNode
287 .textNodes().get(0).getWholeText()
298 private String
getArticleDetailsReal(Element article
) {
299 Element detailsElement
= article
.getElementsByClass("details").first();
300 if (detailsElement
!= null) {
301 return detailsElement
.text();