git://git.nikiroo.be
/
gofetch.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
21f1a99
)
Fix Redit changing IDs
author
Niki Roo
<niki@nikiroo.be>
Wed, 26 Dec 2018 12:08:13 +0000
(13:08 +0100)
committer
Niki Roo
<niki@nikiroo.be>
Wed, 26 Dec 2018 12:08:13 +0000
(13:08 +0100)
src/be/nikiroo/gofetch/support/Reddit.java
patch
|
blob
|
blame
|
history
diff --git
a/src/be/nikiroo/gofetch/support/Reddit.java
b/src/be/nikiroo/gofetch/support/Reddit.java
index 2732894a4fe77168b21ccaeda8cb92d562382a95..f5ae131474c83ab32c050cbbf1a5ce9dc1fd7cd0 100644
(file)
--- a/
src/be/nikiroo/gofetch/support/Reddit.java
+++ b/
src/be/nikiroo/gofetch/support/Reddit.java
@@
-1,27
+1,24
@@
package be.nikiroo.gofetch.support;
package be.nikiroo.gofetch.support;
-import be.nikiroo.gofetch.data.Story;
-import be.nikiroo.gofetch.data.Comment;
-
import java.io.IOException;
import java.io.IOException;
-import java.io.UnsupportedEncodingException;
import java.net.URL;
import java.net.URL;
-import java.
net.URLDecoder
;
+import java.
text.SimpleDateFormat
;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.AbstractMap;
import java.util.ArrayList;
-import java.util.List;
+import java.util.Date;
+import java.util.HashMap;
import java.util.LinkedList;
import java.util.LinkedList;
-import java.util.
Map.Entry
;
+import java.util.
List
;
import java.util.Map;
import java.util.Map;
-import java.util.HashMap;
-import java.util.Date;
-import java.text.SimpleDateFormat;
+import java.util.Map.Entry;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
-import org.jsoup.nodes.Node;
import org.jsoup.select.Elements;
import org.jsoup.select.Elements;
+import be.nikiroo.gofetch.data.Comment;
+import be.nikiroo.gofetch.data.Story;
+
/**
* Support <a href="https://www.reddit.com/">https://www.reddit.com/</a>.
*
/**
* Support <a href="https://www.reddit.com/">https://www.reddit.com/</a>.
*
@@
-37,9
+34,8
@@
public class Reddit extends BasicSupport {
protected List<Entry<URL, String>> getUrls() throws IOException {
List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>();
String base = "https://www.reddit.com/r/";
protected List<Entry<URL, String>> getUrls() throws IOException {
List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>();
String base = "https://www.reddit.com/r/";
- urls.add(new AbstractMap.SimpleEntry<URL, String>(
- new URL(base + "linux_gaming" + "/new/"), "linux_gaming"
- ));
+ urls.add(new AbstractMap.SimpleEntry<URL, String>(new URL(base
+ + "linux_gaming" + "/new/"), "linux_gaming"));
return urls;
}
return urls;
}
@@
-53,7
+49,7
@@
public class Reddit extends BasicSupport {
if (list.isEmpty()) {
list = doc.getElementsByClass("scrollerItem");
}
if (list.isEmpty()) {
list = doc.getElementsByClass("scrollerItem");
}
-
+
return list;
}
return list;
}
@@
-61,57
+57,55
@@
public class Reddit extends BasicSupport {
protected String getArticleId(Document doc, Element article) {
String date = getArticleDate(doc, article);
String title = getArticleTitle(doc, article);
protected String getArticleId(Document doc, Element article) {
String date = getArticleDate(doc, article);
String title = getArticleTitle(doc, article);
-
+
String id = (date + "_" + title).replaceAll("[^a-zA-Z0-9_-]", "_");
if (id.length() > 40) {
id = id.substring(0, 40);
}
String id = (date + "_" + title).replaceAll("[^a-zA-Z0-9_-]", "_");
if (id.length() > 40) {
id = id.substring(0, 40);
}
-
+
return id;
}
@Override
protected String getArticleTitle(Document doc, Element article) {
return id;
}
@Override
protected String getArticleTitle(Document doc, Element article) {
- Elements els = article.getElementsByAttributeValue(
- "
data-event-action", "
title");
+ Elements els = article.getElementsByAttributeValue(
"data-event-action",
+ "title");
if (els == null || els.isEmpty()) {
els = article.getElementsByTag("h2");
}
if (els == null || els.isEmpty()) {
els = article.getElementsByTag("h2");
}
-
+
return els.first().text().trim();
}
return els.first().text().trim();
}
-
+
@Override
protected String getArticleAuthor(Document doc, Element article) {
@Override
protected String getArticleAuthor(Document doc, Element article) {
- return article.getElementsByAttributeValueStarting(
- "href", "/user/"
- ).text().trim();
+ return article.getElementsByAttributeValueStarting("href", "/user/")
+ .text().trim();
}
@Override
protected String getArticleDate(Document doc, Element article) {
Element el = article.getElementsByClass("live-timestamp").first();
if (el == null) {
}
@Override
protected String getArticleDate(Document doc, Element article) {
Element el = article.getElementsByClass("live-timestamp").first();
if (el == null) {
- el = article.getElementsByAttributeValue(
-
"data-click-id",
"timestamp").first();
+ el = article.getElementsByAttributeValue(
"data-click-id",
+
"timestamp").first();
}
}
-
+
String dateAgo = el.text().trim();
String dateAgo = el.text().trim();
- return new SimpleDateFormat("yyyy-MM-dd_HH-mm").format(getDate(dateAgo));
+ return new SimpleDateFormat("yyyy-MM-dd_HH-mm")
+ .format(getDate(dateAgo));
}
@Override
protected String getArticleCategory(Document doc, Element article,
String currentCategory) {
}
@Override
protected String getArticleCategory(Document doc, Element article,
String currentCategory) {
- Elements categEls = article.getElementsByAttributeValueStarting(
- "href", "/r/" + currentCategory + "/search=?q=flair_name"
- );
-
+ Elements categEls = article.getElementsByAttributeValueStarting("href",
+ "/r/" + currentCategory + "/search=?q=flair_name");
+
if (categEls.size() > 0) {
if (categEls.size() > 0) {
- return currentCategory + ", "
- + categEls.first().text().trim();
+ return currentCategory + ", " + categEls.first().text().trim();
}
}
-
+
return currentCategory;
}
return currentCategory;
}
@@
-124,27
+118,27
@@
public class Reddit extends BasicSupport {
protected String getArticleIntUrl(Document doc, Element article) {
String url = article.absUrl("data-permalink");
if (url == null || url.isEmpty()) {
protected String getArticleIntUrl(Document doc, Element article) {
String url = article.absUrl("data-permalink");
if (url == null || url.isEmpty()) {
- url = article.getElementsByAttributeValue(
- "data-click-id", "timestamp").first().absUrl("href");
+ url = article
+ .getElementsByAttributeValue("data-click-id", "timestamp")
+ .first().absUrl("href");
}
}
-
+
return url;
}
@Override
protected String getArticleExtUrl(Document doc, Element article) {
return url;
}
@Override
protected String getArticleExtUrl(Document doc, Element article) {
- Elements els = article.getElementsByAttributeValue(
-
"data-event-action",
"title");
+ Elements els = article.getElementsByAttributeValue(
"data-event-action",
+
"title");
if (els == null || els.isEmpty()) {
if (els == null || els.isEmpty()) {
- els = article.getElementsByAttributeValue(
- "data-click-id", "body");
+ els = article.getElementsByAttributeValue("data-click-id", "body");
}
}
-
+
Element url = els.first();
if (!url.attr("href").trim().startsWith("/")) {
return url.absUrl("href");
}
Element url = els.first();
if (!url.attr("href").trim().startsWith("/")) {
return url.absUrl("href");
}
-
+
return "";
}
return "";
}
@@
-154,18
+148,18
@@
public class Reddit extends BasicSupport {
if (els != null && !els.isEmpty()) {
return els.first().text().trim();
}
if (els != null && !els.isEmpty()) {
return els.first().text().trim();
}
-
+
return "";
}
@Override
protected Element getFullArticle(Document doc) {
return "";
}
@Override
protected Element getFullArticle(Document doc) {
- Element element = doc.getElementsByAttributeValue(
-
"data-click-id",
"body").first();
+ Element element = doc.getElementsByAttributeValue(
"data-click-id",
+
"body").first();
if (element == null) {
element = doc.getElementsByClass("ckueCN").first();
}
if (element == null) {
element = doc.getElementsByClass("ckueCN").first();
}
-
+
return element;
}
return element;
}
@@
-180,7
+174,10
@@
public class Reddit extends BasicSupport {
if (posts.isEmpty()) {
posts = doc.getElementsByClass("eCeBkc");
}
if (posts.isEmpty()) {
posts = doc.getElementsByClass("eCeBkc");
}
-
+ if (posts.isEmpty()) {
+ posts = doc.getElementsByClass("gxtxxZ");
+ }
+
return posts;
}
return posts;
}
@@
-190,8
+187,15
@@
public class Reddit extends BasicSupport {
List<Element> elements = new LinkedList<Element>();
for (Element el : container.children()) {
elements.addAll(el.getElementsByClass("jHfOJm"));
List<Element> elements = new LinkedList<Element>();
for (Element el : container.children()) {
elements.addAll(el.getElementsByClass("jHfOJm"));
+
+ }
+
+ if (elements.isEmpty()) {
+ for (Element el : container.children()) {
+ elements.addAll(el.getElementsByClass("Comment"));
+ }
}
}
-
+
return elements;
}
return elements;
}
@@
-199,15
+203,17
@@
public class Reddit extends BasicSupport {
protected String getCommentId(Element post) {
int level = 1;
Elements els = post.getElementsByClass("imyGpC");
protected String getCommentId(Element post) {
int level = 1;
Elements els = post.getElementsByClass("imyGpC");
- if (els.size() > 0) {
- String l = els.first().text().trim()
- .replace("level ", "");
+ if (els.isEmpty())
+ els.addAll(post.getElementsByClass("emJXdb"));
+
+ if (!els.isEmpty()) {
+ String l = els.first().text().trim().replace("level ", "");
try {
level = Integer.parseInt(l);
try {
level = Integer.parseInt(l);
- } catch(NumberFormatException e) {
+ } catch
(NumberFormatException e) {
}
}
}
}
-
+
return Integer.toString(level);
}
return Integer.toString(level);
}
@@
-220,45
+226,51
@@
public class Reddit extends BasicSupport {
@Override
protected String getCommentTitle(Element post) {
// Since we have no title, we switch with author
@Override
protected String getCommentTitle(Element post) {
// Since we have no title, we switch with author
- Elements els = post.getElementsByClass("RVnoX");
- if (els.size() > 0) {
- return els.first().text().trim();
- }
-
-
els = post.getElementsByClass("kzePTH"
);
- if (els.size() > 0) {
- return els.first().text().trim();
- }
-
+
+ Element authorEl = post.getElementsByClass("RVnoX").first();
+ if (authorEl == null)
+ authorEl = post.getElementsByClass("kzePTH").first();
+ if (authorEl == null)
+
authorEl = post.getElementsByClass("jczTlv").first(
);
+
+ if (authorEl != null)
+ return authorEl.text().trim();
+
return "";
}
@Override
protected String getCommentDate(Element post) {
return "";
}
@Override
protected String getCommentDate(Element post) {
- String dateAgo = post.getElementsByClass("hJDlLH")
- .first().text().trim();
- return new SimpleDateFormat("yyyy-MM-dd_HH-mm").format(getDate(dateAgo));
+ Element elAgo = post.getElementsByClass("hJDlLH").first();
+ if (elAgo == null)
+ elAgo = post.getElementsByClass("hDplaG").first();
+
+ if (elAgo != null) {
+ String dateAgo = elAgo.text().trim();
+ return new SimpleDateFormat("yyyy-MM-dd_HH-mm")
+ .format(getDate(dateAgo));
+ }
+
+ return "";
}
@Override
protected Element getCommentContentElement(Element post) {
}
@Override
protected Element getCommentContentElement(Element post) {
- return post.getElementsByClass("ckueCN")
- .first();
+ return post.getElementsByClass("ckueCN").first();
}
@Override
protected ElementProcessor getElementProcessorComment() {
return new BasicElementProcessor();
}
}
@Override
protected ElementProcessor getElementProcessorComment() {
return new BasicElementProcessor();
}
-
+
@Override
public void fetch(Story story) throws IOException {
super.fetch(story);
@Override
public void fetch(Story story) throws IOException {
super.fetch(story);
-
+
List<Comment> comments = new LinkedList<Comment>();
List<Comment> comments = new LinkedList<Comment>();
- Map<Integer, Comment> lastOfLevel =
- new HashMap<Integer, Comment>();
-
+ Map<Integer, Comment> lastOfLevel = new HashMap<Integer, Comment>();
+
for (Comment c : story.getComments()) {
int level = Integer.parseInt(c.getId());
lastOfLevel.put(level, c);
for (Comment c : story.getComments()) {
int level = Integer.parseInt(c.getId());
lastOfLevel.put(level, c);
@@
-266,7
+278,7
@@
public class Reddit extends BasicSupport {
comments.add(c);
} else {
Comment parent = lastOfLevel.get(level - 1);
comments.add(c);
} else {
Comment parent = lastOfLevel.get(level - 1);
- if (parent != null
)
{
+ if (parent != null
)
{
parent.add(c);
} else {
// bad data
parent.add(c);
} else {
// bad data
@@
-274,10
+286,10
@@
public class Reddit extends BasicSupport {
}
}
}
}
}
}
-
+
story.setComments(comments);
}
story.setComments(comments);
}
-
+
// 2 hours ago -> 18/10/2018 21:00
private Date getDate(String dateAgo) {
int h = 0;
// 2 hours ago -> 18/10/2018 21:00
private Date getDate(String dateAgo) {
int h = 0;
@@
-292,12
+304,12
@@
public class Reddit extends BasicSupport {
dateAgo = dateAgo.replace("days ago", "").trim();
h = Integer.parseInt(dateAgo) * 24;
}
dateAgo = dateAgo.replace("days ago", "").trim();
h = Integer.parseInt(dateAgo) * 24;
}
-
- long now = new Date().getTime();
// in ms since 1970
- now = now / (1000l * 60l * 60l);
// in hours since 1970
- long then = now - h;
// in hours since 1970
+
+ long now = new Date().getTime(); // in ms since 1970
+ now = now / (1000l * 60l * 60l); // in hours since 1970
+ long then = now - h; // in hours since 1970
then = then * (1000l * 60l * 60l); // in ms since 1970
then = then * (1000l * 60l * 60l); // in ms since 1970
-
+
return new Date(then);
}
}
return new Date(then);
}
}