6fb51a6dd32cb5f0bd37692165560ca1fdb915b9
1 package be
.nikiroo
.gofetch
.support
;
3 import java
.io
.IOException
;
5 import java
.util
.AbstractMap
;
6 import java
.util
.ArrayList
;
8 import java
.util
.Map
.Entry
;
10 import org
.jsoup
.nodes
.Document
;
11 import org
.jsoup
.nodes
.Element
;
12 import org
.jsoup
.nodes
.Node
;
13 import org
.jsoup
.select
.Elements
;
16 * Support <a href='https://slashdot.org/'>https://slashdot.org/</a>.
20 public class Slashdot
extends BasicSupport
{
22 public String
getDescription() {
23 return "Slashdot: News for nerds, stuff that matters!";
27 protected List
<Entry
<URL
, String
>> getUrls() throws IOException
{
28 List
<Entry
<URL
, String
>> urls
= new ArrayList
<Entry
<URL
, String
>>();
29 urls
.add(new AbstractMap
.SimpleEntry
<URL
, String
>(new URL(
30 "https://slashdot.org/"), ""));
35 protected List
<Element
> getArticles(Document doc
) {
36 return doc
.getElementsByTag("header");
40 protected String
getArticleId(Document doc
, Element article
) {
41 Element title
= article
.getElementsByClass("story-title").first();
43 String id
= title
.attr("id");
44 if (id
.startsWith("title-")) {
45 id
= id
.substring("title-".length());
48 while (id
.length() < 10) {
59 protected String
getArticleTitle(Document doc
, Element article
) {
60 Element title
= article
.getElementsByClass("story-title").first();
69 protected String
getArticleAuthor(Document doc
, Element article
) {
70 // details: "Posted by AUTHOR on DATE from the further-crackdown dept."
71 String details
= getArticleDetailsReal(article
);
72 int pos
= details
.indexOf(" on ");
73 if (details
.startsWith("Posted by ") && pos
>= 0) {
74 return details
.substring("Posted by ".length(), pos
).trim();
81 protected String
getArticleDate(Document doc
, Element article
) {
82 // Do not try bad articles
83 if (getArticleId(doc
, article
).isEmpty()) {
87 Element dateElement
= doc
.getElementsByTag("time").first();
88 if (dateElement
!= null) {
89 String date
= dateElement
.text().trim();
90 if (date
.startsWith("on ")) {
91 date
= date
.substring("on ".length());
101 protected String
getArticleCategory(Document doc
, Element article
,
102 String currentCategory
) {
103 Element categElement
= doc
.getElementsByClass("topic").first();
104 if (categElement
!= null) {
105 return categElement
.text();
112 protected String
getArticleDetails(Document doc
, Element article
) {
113 // details: "Posted by AUTHOR on DATE from the further-crackdown dept."
114 String details
= getArticleDetailsReal(article
);
115 int pos
= details
.indexOf(" from the ");
117 return details
.substring(pos
).trim();
124 protected String
getArticleIntUrl(Document doc
, Element article
) {
125 Element title
= article
.getElementsByClass("story-title").first();
127 Elements links
= title
.getElementsByTag("a");
128 if (links
.size() > 0) {
129 return links
.get(0).absUrl("href");
136 protected String
getArticleExtUrl(Document doc
, Element article
) {
137 Element title
= article
.getElementsByClass("story-title").first();
139 Elements links
= title
.getElementsByTag("a");
140 if (links
.size() > 1) {
141 return links
.get(1).absUrl("href");
148 protected String
getArticleContent(Document doc
, Element article
) {
149 Element contentElement
= doc
//
150 .getElementById("text-" + getArticleId(doc
, article
));
151 if (contentElement
!= null) {
152 return contentElement
.text();
159 protected Element
getFullArticle(Document doc
) {
164 protected List
<Element
> getFullArticleCommentPosts(Document doc
, URL intUrl
) {
165 List
<Element
> commentElements
= new ArrayList
<Element
>();
166 Element listing
= doc
.getElementById("commentlisting");
167 if (listing
!= null) {
168 for (Element commentElement
: listing
.children()) {
169 if (commentElement
.hasClass("comment")) {
170 commentElements
.add(commentElement
);
175 return commentElements
;
179 protected ElementProcessor
getElementProcessorFullArticle() {
184 protected List
<Element
> getCommentCommentPosts(Document doc
,
186 List
<Element
> commentElements
= new ArrayList
<Element
>();
187 for (Element child
: container
.children()) {
188 if (child
.id().contains("commtree_")) {
189 for (Element sub
: child
.children()) {
190 if (sub
.hasClass("comment")) {
191 commentElements
.add(sub
);
197 return commentElements
;
201 protected String
getCommentId(Element post
) {
202 if (post
.hasClass("hidden")) {
210 protected String
getCommentAuthor(Element post
) {
211 if (post
.hasClass("hidden")) {
215 Element author
= post
.getElementsByClass("by").first();
216 if (author
!= null) {
217 return author
.text();
224 protected String
getCommentTitle(Element post
) {
225 if (post
.hasClass("hidden")) {
229 Element title
= post
.getElementsByClass("title").first();
238 protected String
getCommentDate(Element post
) {
239 if (post
.hasClass("hidden")) {
243 Element date
= post
.getElementsByClass("otherdetails").first();
252 protected Element
getCommentContentElement(Element post
) {
253 if (post
.hasClass("hidden")) {
257 return post
.getElementsByClass("commentBody").first();
261 protected ElementProcessor
getElementProcessorComment() {
262 return new BasicElementProcessor() {
264 public String
processText(String text
) {
265 while (text
.startsWith(">")) { // comment in one-liners
266 text
= text
.substring(1).trim();
273 public boolean detectQuote(Node node
) {
274 if (node
instanceof Element
) {
275 Element elementNode
= (Element
) node
;
276 if (elementNode
.tagName().equals("blockquote")
277 || elementNode
.hasClass("quote")
278 || (elementNode
.tagName().equals("p")
279 && elementNode
.textNodes().size() == 1 && elementNode
280 .textNodes().get(0).getWholeText()
291 private String
getArticleDetailsReal(Element article
) {
292 Element detailsElement
= article
.getElementsByClass("details").first();
293 if (detailsElement
!= null) {
294 return detailsElement
.text();