src/be/nikiroo/gofetch/support/Pipedot.java

   1 package be.nikiroo.gofetch.support;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.net.URL;
   6 import java.util.ArrayList;
   7 import java.util.List;
   8
   9 import org.jsoup.helper.DataUtil;
  10 import org.jsoup.nodes.Document;
  11 import org.jsoup.nodes.Element;
  12 import org.jsoup.nodes.Node;
  13 import org.jsoup.select.Elements;
  14
  15 import be.nikiroo.gofetch.data.Comment;
  16 import be.nikiroo.gofetch.data.Story;
  17
  18 /**
  19  * Support <a href='https://pipedot.org/'>https://pipedot.org/</a>.
  20  *
  21  * @author niki
  22  */
  23 public class Pipedot extends BasicSupport {
  24         @Override
  25         public String getDescription() {
  26                 return "Pipedot: News for nerds, without the corporate slant";
  27         }
  28
  29         @Override
  30         public List<Story> list() throws IOException {
  31                 List<Story> list = new ArrayList<Story>();
  32
  33                 URL url = new URL("https://pipedot.org/");
  34                 InputStream in = downloader.open(url);
  35                 Document doc = DataUtil.load(in, "UTF-8", url.toString());
  36                 Elements articles = doc.getElementsByClass("story");
  37                 for (Element article : articles) {
  38                         Elements titles = article.getElementsByTag("h1");
  39                         if (titles.size() == 0) {
  40                                 continue;
  41                         }
  42
  43                         Element title = titles.get(0);
  44
  45                         String id = "";
  46                         for (Element idElem : article.getElementsByTag("a")) {
  47                                 if (idElem.attr("href").startsWith("/pipe/")) {
  48                                         id = idElem.attr("href").substring("/pipe/".length());
  49                                         break;
  50                                 }
  51                         }
  52
  53                         String intUrl = null;
  54                         String extUrl = null;
  55
  56                         Elements links = article.getElementsByTag("a");
  57                         if (links.size() > 0) {
  58                                 intUrl = links.get(0).absUrl("href");
  59                         }
  60
  61                         // Take first ext URL as original source
  62                         for (Element link : links) {
  63                                 String uuu = link.absUrl("href");
  64                                 if (!uuu.isEmpty() && !uuu.contains("pipedot.org/")) {
  65                                         extUrl = uuu;
  66                                         break;
  67                                 }
  68                         }
  69
  70                         String details = "";
  71                         Elements detailsElements = article.getElementsByTag("div");
  72                         if (detailsElements.size() > 0) {
  73                                 details = detailsElements.get(0).text().trim();
  74                         }
  75
  76                         String author = "";
  77                         int pos = details.indexOf("by ");
  78                         if (pos >= 0) {
  79                                 author = details.substring(pos + "by ".length()).trim();
  80                                 pos = author.indexOf(" in ");
  81                                 if (pos >= 0) {
  82                                         author = author.substring(0, pos).trim();
  83                                 }
  84                         }
  85
  86                         String categ = "";
  87                         pos = details.indexOf(" in ");
  88                         if (pos >= 0) {
  89                                 categ = details.substring(pos + " in ".length()).trim();
  90                                 pos = categ.indexOf(" on ");
  91                                 if (pos >= 0) {
  92                                         categ = categ.substring(0, pos).trim();
  93                                 }
  94                         }
  95
  96                         String date = "";
  97                         Element dateElement = article.getElementsByTag("time").first();
  98                         if (dateElement != null) {
  99                                 date = date(dateElement.attr("datetime"));
 100                         }
 101
 102                         // We already have all the details (date, author, id, categ)
 103                         details = "";
 104
 105                         String body = "";
 106                         for (Element elem : article.children()) {
 107                                 String tag = elem.tag().toString();
 108                                 if (!tag.equals("header") && !tag.equals("footer")) {
 109                                         body = elem.text();
 110                                         break;
 111                                 }
 112                         }
 113
 114                         list.add(new Story(getType(), id, title.text(), author, date,
 115                                         categ, details, intUrl, extUrl, body));
 116                 }
 117
 118                 return list;
 119         }
 120
 121         @Override
 122         public void fetch(Story story) throws IOException {
 123                 List<Comment> comments = new ArrayList<Comment>();
 124
 125                 URL url = new URL(story.getUrlInternal());
 126                 InputStream in = downloader.open(url);
 127                 Document doc = DataUtil.load(in, "UTF-8", url.toString());
 128                 Elements listing = doc.getElementsByTag("main");
 129                 if (listing.size() > 0) {
 130                         comments.addAll(getComments(listing.get(0)));
 131                 }
 132
 133                 story.setComments(comments);
 134         }
 135
 136         private List<Comment> getComments(Element listing) {
 137                 List<Comment> comments = new ArrayList<Comment>();
 138                 for (Element commentElement : listing.children()) {
 139                         if (commentElement.hasClass("comment")) {
 140                                 Comment comment = getComment(commentElement);
 141                                 if (!comment.isEmpty()) {
 142                                         comments.add(comment);
 143                                 }
 144                         }
 145                 }
 146                 return comments;
 147         }
 148
 149         private Comment getComment(Element commentElement) {
 150                 String title = firstOrEmptyTag(commentElement, "h3").text();
 151                 String author = firstOrEmpty(commentElement, "h4").text();
 152                 Element content = firstOrEmpty(commentElement, "comment-body");
 153
 154                 String date = "";
 155                 int pos = author.lastIndexOf(" on ");
 156                 if (pos >= 0) {
 157                         date = author.substring(pos + " on ".length()).trim();
 158                         author = author.substring(0, pos).trim();
 159                 }
 160
 161                 Comment comment = new Comment(commentElement.id(), author, title, date,
 162                                 toLines(content));
 163
 164                 Elements commentOutline = commentElement
 165                                 .getElementsByClass("comment-outline");
 166                 if (commentOutline.size() > 0) {
 167                         comment.addAll(getComments(commentOutline.get(0)));
 168                 }
 169
 170                 return comment;
 171         }
 172
 173         private List<String> toLines(Element element) {
 174                 return toLines(element, new BasicElementProcessor() {
 175                         @Override
 176                         public boolean detectQuote(Node node) {
 177                                 if (node instanceof Element) {
 178                                         Element elementNode = (Element) node;
 179                                         if (elementNode.tagName().equals("blockquote")
 180                                                         || elementNode.hasClass("quote")) {
 181                                                 return true;
 182                                         }
 183                                 }
 184
 185                                 return false;
 186                         }
 187                 });
 188         }
 189 }