src/be/nikiroo/gofetch/support/TooLinux.java

   1 package be.nikiroo.gofetch.support;
   2
   3 import java.io.IOException;
   4 import java.io.InputStream;
   5 import java.net.URL;
   6 import java.util.ArrayList;
   7 import java.util.List;
   8
   9 import org.jsoup.helper.DataUtil;
  10 import org.jsoup.nodes.Document;
  11 import org.jsoup.nodes.Element;
  12 import org.jsoup.nodes.Node;
  13 import org.jsoup.select.Elements;
  14
  15 import be.nikiroo.gofetch.data.Comment;
  16 import be.nikiroo.gofetch.data.Story;
  17 import be.nikiroo.utils.StringUtils;
  18
  19 /**
  20  * Support <a href="https://www.toolinux.com/">https://www.toolinux.com/</a>.
  21  *
  22  * @author niki
  23  */
  24 public class TooLinux extends BasicSupport {
  25         @Override
  26         public String getDescription() {
  27                 return "TooLinux: Actualité généraliste sur Linux et les logiciels libres";
  28         }
  29
  30         @Override
  31         public List<Story> list() throws IOException {
  32                 List<Story> list = new ArrayList<Story>();
  33
  34                 URL url = new URL("https://www.toolinux.com/");
  35                 InputStream in = downloader.open(url);
  36                 Document doc = DataUtil.load(in, "UTF-8", url.toString());
  37                 Elements articles = doc.getElementsByClass("hentry");
  38                 for (Element article : articles) {
  39                         String id = "";
  40                         String intUrl = "";
  41                         String extUrl = ""; // nope
  42                         String title = "";
  43                         String date = "";
  44                         String details = "";
  45                         String body = "";
  46                         String author = ""; // nope
  47                         String categ = ""; // nope
  48
  49                         Element urlElement = article.getElementsByTag("a").first();
  50                         if (urlElement != null) {
  51                                 intUrl = urlElement.absUrl("href");
  52                         }
  53
  54                         Element titleElement = article.getElementsByClass("entry-title")
  55                                         .first();
  56                         if (titleElement != null) {
  57                                 title = StringUtils.unhtml(titleElement.text()).trim();
  58                         }
  59
  60                         Element dateElement = article.getElementsByClass("published")
  61                                         .first();
  62                         if (dateElement != null) {
  63                                 date = StringUtils.unhtml(dateElement.text()).trim();
  64                                 id = dateElement.attr("title").trim();
  65                         }
  66
  67                         if (id.isEmpty()) {
  68                                 // fallback
  69                                 id = intUrl.replace("/", "_");
  70                         }
  71
  72                         Element detailsElement = article.getElementsByClass("introduction")
  73                                         .first();
  74                         if (detailsElement != null) {
  75                                 details += StringUtils.unhtml(detailsElement.text()).trim();
  76                         }
  77
  78                         list.add(new Story(getType(), id, title, author, date, categ,
  79                                         details, intUrl, extUrl, body));
  80                 }
  81
  82                 return list;
  83         }
  84
  85         @Override
  86         public void fetch(Story story) throws IOException {
  87                 String fullContent = story.getContent();
  88                 List<Comment> comments = new ArrayList<Comment>();
  89                 story.setComments(comments);
  90
  91                 URL url = new URL(story.getUrlInternal());
  92                 InputStream in = downloader.open(url);
  93                 try {
  94                         Document doc = DataUtil.load(in, "UTF-8", url.toString());
  95                         Element article = doc.getElementById("content");
  96                         if (article != null) {
  97                                 for (String line : toLines(article,
  98                                                 new BasicElementProcessor() {
  99                                                         @Override
 100                                                         public boolean ignoreNode(Node node) {
 101                                                                 if ("notes".equals(node.attr("class"))) {
 102                                                                         return true;
 103                                                                 }
 104                                                                 return false;
 105                                                         }
 106                                                 })) {
 107                                         fullContent += line + "\n";
 108                                 }
 109
 110                                 // Content is too tight with a single break per line:
 111                                 fullContent = fullContent.replace("\n", "\n\n") //
 112                                                 .replace("\n\n\n\n", "\n\n") //
 113                                                 .replace("\n\n\n\n", "\n\n") //
 114                                                 .trim();
 115                         }
 116
 117                         story.setFullContent(fullContent);
 118                 } finally {
 119                         if (in != null) {
 120                                 in.close();
 121                         }
 122                 }
 123         }
 124 }