src/be/nikiroo/gofetch/support/Pipedot.java

   1 package be.nikiroo.gofetch.support;
   2
   3 import java.io.IOException;
   4 import java.net.URL;
   5 import java.util.AbstractMap;
   6 import java.util.ArrayList;
   7 import java.util.List;
   8 import java.util.Map.Entry;
   9
  10 import org.jsoup.nodes.Document;
  11 import org.jsoup.nodes.Element;
  12 import org.jsoup.nodes.Node;
  13 import org.jsoup.select.Elements;
  14
  15 /**
  16  * Support <a href='https://pipedot.org/'>https://pipedot.org/</a>.
  17  *
  18  * @author niki
  19  */
  20 public class Pipedot extends BasicSupport {
  21         @Override
  22         public String getDescription() {
  23                 return "Pipedot: News for nerds, without the corporate slant";
  24         }
  25
  26         @Override
  27         protected List<Entry<URL, String>> getUrls() throws IOException {
  28                 List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>();
  29                 urls.add(new AbstractMap.SimpleEntry<URL, String>(new URL(
  30                                 "https://pipedot.org/"), ""));
  31                 return urls;
  32         }
  33
  34         @Override
  35         protected List<Element> getArticles(Document doc) {
  36                 return doc.getElementsByClass("story");
  37         }
  38
  39         @Override
  40         protected String getArticleId(Document doc, Element article) {
  41                 // Don't try on bad articles
  42                 if (getArticleTitle(doc, article).isEmpty()) {
  43                         return "";
  44                 }
  45
  46                 for (Element idElem : article.getElementsByTag("a")) {
  47                         if (idElem.attr("href").startsWith("/pipe/")) {
  48                                 return idElem.attr("href").substring("/pipe/".length());
  49                         }
  50                 }
  51
  52                 return "";
  53         }
  54
  55         @Override
  56         protected String getArticleTitle(Document doc, Element article) {
  57                 Element title = article.getElementsByTag("h1").first();
  58                 if (title != null) {
  59                         return title.text();
  60                 }
  61
  62                 return "";
  63         }
  64
  65         @Override
  66         protected String getArticleAuthor(Document doc, Element article) {
  67                 String value = getArticleDetailsReal(article);
  68                 int pos = value.indexOf("by ");
  69                 if (pos >= 0) {
  70                         value = value.substring(pos + "by ".length()).trim();
  71                         pos = value.indexOf(" in ");
  72                         if (pos >= 0) {
  73                                 value = value.substring(0, pos).trim();
  74                         }
  75
  76                         return value;
  77                 }
  78
  79                 return "";
  80         }
  81
  82         @Override
  83         protected String getArticleDate(Document doc, Element article) {
  84                 Element dateElement = article.getElementsByTag("time").first();
  85                 if (dateElement != null) {
  86                         return dateElement.attr("datetime");
  87                 }
  88
  89                 return "";
  90         }
  91
  92         @Override
  93         protected String getArticleCategory(Document doc, Element article,
  94                         String currentCategory) {
  95                 String value = getArticleDetailsReal(article);
  96                 int pos = value.indexOf(" in ");
  97                 if (pos >= 0) {
  98                         value = value.substring(pos + " in ".length()).trim();
  99                         pos = value.indexOf(" on ");
 100                         if (pos >= 0) {
 101                                 value = value.substring(0, pos).trim();
 102                         }
 103
 104                         return value;
 105                 }
 106
 107                 return "";
 108         }
 109
 110         @Override
 111         protected String getArticleDetails(Document doc, Element article) {
 112                 return ""; // We alrady extracted all the info
 113         }
 114
 115         @Override
 116         protected String getArticleIntUrl(Document doc, Element article) {
 117                 Element link = article.getElementsByTag("a").first();
 118                 if (link != null) {
 119                         return link.absUrl("href");
 120                 }
 121
 122                 return "";
 123         }
 124
 125         @Override
 126         protected String getArticleExtUrl(Document doc, Element article) {
 127                 Element link = article.getElementsByTag("a").first();
 128                 if (link != null) {
 129                         String possibleExtLink = link.absUrl("href").trim();
 130                         if (!possibleExtLink.isEmpty()
 131                                         && !possibleExtLink.contains("pipedot.org/")) {
 132                                 return possibleExtLink;
 133                         }
 134                 }
 135
 136                 return "";
 137         }
 138
 139         @Override
 140         protected String getArticleContent(Document doc, Element article) {
 141                 for (Element elem : article.children()) {
 142                         String tag = elem.tagName();
 143                         if (!tag.equals("header") && !tag.equals("footer")) {
 144                                 return getArticleText(elem);
 145                         }
 146                 }
 147
 148                 return "";
 149         }
 150
 151         @Override
 152         protected Element getFullArticle(Document doc) {
 153                 return null;
 154         }
 155
 156         @Override
 157         protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) {
 158                 return getCommentElements(doc.getElementsByTag("main").first());
 159         }
 160
 161         @Override
 162         protected ElementProcessor getElementProcessorFullArticle() {
 163                 return new BasicElementProcessor();
 164         }
 165
 166         @Override
 167         protected List<Element> getCommentCommentPosts(Document doc,
 168                         Element container) {
 169
 170                 if (container != null) {
 171                         container = container.getElementsByClass("comment-outline").first();
 172                 }
 173
 174                 return getCommentElements(container);
 175         }
 176
 177         @Override
 178         protected String getCommentId(Element post) {
 179                 return post.id();
 180         }
 181
 182         @Override
 183         protected String getCommentAuthor(Element post) {
 184                 Element authorDateE = post.getElementsByTag("h3").first();
 185                 if (authorDateE != null) {
 186                         String authorDate = authorDateE.text();
 187                         int pos = authorDate.lastIndexOf(" on ");
 188                         if (pos >= 0) {
 189                                 return authorDate.substring(0, pos).trim();
 190                         }
 191                 }
 192
 193                 return "";
 194         }
 195
 196         @Override
 197         protected String getCommentTitle(Element post) {
 198                 Element title = post.getElementsByTag("h3").first();
 199                 if (title != null) {
 200                         return title.text();
 201                 }
 202
 203                 return "";
 204         }
 205
 206         @Override
 207         protected String getCommentDate(Element post) {
 208                 Element authorDateE = post.getElementsByTag("h3").first();
 209                 if (authorDateE != null) {
 210                         String authorDate = authorDateE.text();
 211                         int pos = authorDate.lastIndexOf(" on ");
 212                         if (pos >= 0) {
 213                                 return authorDate.substring(pos + " on ".length()).trim();
 214                         }
 215                 }
 216
 217                 return "";
 218         }
 219
 220         @Override
 221         protected Element getCommentContentElement(Element post) {
 222                 return post.getElementsByClass("comment-body").first();
 223         }
 224
 225         @Override
 226         protected ElementProcessor getElementProcessorComment() {
 227                 return new BasicElementProcessor() {
 228                         @Override
 229                         public boolean detectQuote(Node node) {
 230                                 if (node instanceof Element) {
 231                                         Element elementNode = (Element) node;
 232                                         if (elementNode.tagName().equals("blockquote")
 233                                                         || elementNode.hasClass("quote")) {
 234                                                 return true;
 235                                         }
 236                                 }
 237
 238                                 return false;
 239                         }
 240                 };
 241         }
 242
 243         private String getArticleDetailsReal(Element article) {
 244                 Elements detailsElements = article.getElementsByTag("div");
 245                 if (detailsElements.size() > 0) {
 246                         return detailsElements.get(0).text().trim();
 247                 }
 248
 249                 return "";
 250         }
 251
 252         private List<Element> getCommentElements(Element container) {
 253                 List<Element> commentElements = new ArrayList<Element>();
 254                 if (container != null) {
 255                         for (Element commentElement : container.children()) {
 256                                 if (commentElement.hasClass("comment")) {
 257                                         commentElements.add(commentElement);
 258                                 }
 259                         }
 260                 }
 261                 return commentElements;
 262         }
 263 }