src/be/nikiroo/gofetch/support/TooLinux.java

   1 package be.nikiroo.gofetch.support;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.net.URL;
   6 import java.util.ArrayList;
   7 import java.util.List;
   8
   9 import org.jsoup.helper.DataUtil;
  10 import org.jsoup.nodes.Document;
  11 import org.jsoup.nodes.Element;
  12 import org.jsoup.nodes.Node;
  13 import org.jsoup.select.Elements;
  14
  15 import be.nikiroo.gofetch.data.Comment;
  16 import be.nikiroo.gofetch.data.Story;
  17 import be.nikiroo.utils.StringUtils;
  18
  19 public class TooLinux extends BasicSupport {
  20         @Override
  21         public String getDescription() {
  22                 return "Premier quotidien francophone d'actualité généraliste sur Linux, les logiciels libres et l'interopérabilité, depuis mars 2000.";
  23         }
  24
  25         @Override
  26         public List<Story> list() throws IOException {
  27                 List<Story> list = new ArrayList<Story>();
  28
  29                 URL url = new URL("https://www.toolinux.com/");
  30                 InputStream in = downloader.open(url);
  31                 Document doc = DataUtil.load(in, "UTF-8", url.toString());
  32                 Elements articles = doc.getElementsByClass("hentry");
  33                 for (Element article : articles) {
  34                         String id = "";
  35                         String intUrl = "";
  36                         String extUrl = ""; // nope
  37                         String title = "";
  38                         String date = "";
  39                         String details = "";
  40                         String body = "";
  41
  42                         Element urlElement = article.getElementsByTag("a").first();
  43                         if (urlElement != null) {
  44                                 intUrl = urlElement.absUrl("href");
  45                         }
  46
  47                         Element titleElement = article.getElementsByClass("entry-title")
  48                                         .first();
  49                         if (titleElement != null) {
  50                                 title = StringUtils.unhtml(titleElement.text()).trim();
  51                         }
  52
  53                         Element dateElement = article.getElementsByClass("published")
  54                                         .first();
  55                         if (dateElement != null) {
  56                                 date = StringUtils.unhtml(dateElement.text()).trim();
  57                                 id = dateElement.attr("title").trim();
  58                         }
  59
  60                         if (id.isEmpty()) {
  61                                 // fallback
  62                                 id = intUrl.replace("/", "_");
  63                         }
  64
  65                         Element detailsElement = article.getElementsByClass("introduction")
  66                                         .first();
  67                         details = "(" + date + ") ";
  68                         if (detailsElement != null) {
  69                                 details += StringUtils.unhtml(detailsElement.text()).trim();
  70                         }
  71
  72                         list.add(new Story(getType(), id, title, details, intUrl, extUrl,
  73                                         body));
  74                 }
  75
  76                 return list;
  77         }
  78
  79         @Override
  80         public void fetch(Story story) throws IOException {
  81                 String fullContent = story.getContent();
  82                 List<Comment> comments = new ArrayList<Comment>();
  83                 story.setComments(comments);
  84
  85                 URL url = new URL(story.getUrlInternal());
  86                 InputStream in = downloader.open(url);
  87                 try {
  88                         Document doc = DataUtil.load(in, "UTF-8", url.toString());
  89                         Element article = doc.getElementById("content");
  90                         if (article != null) {
  91                                 for (String line : toLines(article,
  92                                                 new BasicElementProcessor() {
  93                                                         @Override
  94                                                         public boolean ignoreNode(Node node) {
  95                                                                 if ("notes".equals(node.attr("class"))) {
  96                                                                         return true;
  97                                                                 }
  98                                                                 return false;
  99                                                         }
 100                                                 })) {
 101                                         fullContent += line + "\n";
 102                                 }
 103
 104                                 // Content is too tight with a single break per line:
 105                                 fullContent = fullContent.replace("\n", "\n\n") //
 106                                                 .replace("\n\n\n\n", "\n\n") //
 107                                                 .replace("\n\n\n\n", "\n\n") //
 108                                                 .trim();
 109                         }
 110
 111                         story.setFullContent(fullContent);
 112                 } finally {
 113                         if (in != null) {
 114                                 in.close();
 115                         }
 116                 }
 117         }
 118 }