Version 0.2.0: supports LWN, quotes, <br>s
[gofetch.git] / src / be / nikiroo / gofetch / support / LWN.java
index 869380dd03ec4c4ab0cb14b6c08d4614141c16ab..dba4c3bfa922f684729352baa2d438e075c1ac88 100644 (file)
@@ -9,6 +9,7 @@ import java.util.List;
 import org.jsoup.helper.DataUtil;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
 import org.jsoup.select.Elements;
 
 import be.nikiroo.gofetch.data.Comment;
@@ -27,9 +28,6 @@ public class LWN extends BasicSupport {
 
        @Override
        public List<Story> list() throws IOException {
-               // TODO: comments + do not get comment for [$] stories
-               // + update body on getComment (global change, also LinuxToday)
-               
                List<Story> list = new ArrayList<Story>();
 
                URL url = new URL("https://lwn.net/");
@@ -45,29 +43,33 @@ public class LWN extends BasicSupport {
                        if (listings.size() == 0) {
                                continue;
                        }
-                       
+
                        Element listing = listings.get(0);
                        if (listing.children().size() < 2) {
                                continue;
                        }
-                       
 
                        String title = titles.get(0).text();
                        String details = listing.children().get(0).text();
-                       String body = listing.children().get(1).text();
-                       
+                       String body = "";
+                       // All but the first and two last children
+                       for (int i = 1; i < listing.children().size() - 2; i++) {
+                               Element e = listing.children().get(i);
+                               body = body.trim() + " " + e.text().trim();
+                       }
+                       body = body.trim();
+
                        String author = "";
                        int pos = details.indexOf(" by ");
                        if (pos >= 0) {
                                author = details.substring(pos + " by ".length()).trim();
                        }
-                       
+
                        String date = "";
                        pos = details.indexOf(" Posted ");
                        if (pos >= 0) {
                                date = details.substring(pos + " Posted ".length()).trim();
                        }
-                       
 
                        String id = "";
                        String intUrl = "";
@@ -77,108 +79,128 @@ public class LWN extends BasicSupport {
                                intUrl = idElem.absUrl("href");
                                pos = intUrl.indexOf("#Comments");
                                if (pos >= 0) {
-                                       intUrl = intUrl.substring(0, pos -1);
+                                       intUrl = intUrl.substring(0, pos - 1);
                                }
                                id = intUrl.replaceAll("[^0-9]", "");
                        }
 
-                       list.add(new Story(getType(), id, title, details, intUrl, extUrl, body));
+                       list.add(new Story(getType(), id, title, details, intUrl, extUrl,
+                                       body));
                }
 
                return list;
        }
 
        @Override
-       public List<Comment> getComments(Story story) throws IOException {
+       public void fetch(Story story) throws IOException {
                List<Comment> comments = new ArrayList<Comment>();
+               String fullContent = story.getContent();
+
+               // Do not try the paid-for stories...
+               if (!story.getTitle().startsWith("[$]")) {
+                       URL url = new URL(story.getUrlInternal());
+                       InputStream in = open(url);
+                       Document doc = DataUtil.load(in, "UTF-8", url.toString());
+                       Elements fullContentElements = doc
+                                       .getElementsByClass("ArticleText");
+                       if (fullContentElements.size() > 0) {
+                               // comments.addAll(getComments(listing.get(0)));
+                               fullContent = fullContentElements.get(0).text();
+                       }
 
-               /*
-               URL url = new URL(story.getUrlInternal());
-               InputStream in = open(url);
-               Document doc = DataUtil.load(in, "UTF-8", url.toString());
-               Elements listing = doc.getElementsByTag("main");
-               if (listing.size() > 0) {
-                       comments.addAll(getComments(listing.get(0)));
+                       Elements listing = doc.getElementsByClass("lwn-u-1");
+                       if (listing.size() > 0) {
+                               comments.addAll(getComments(listing.get(0)));
+                       }
+               } else {
+                       fullContent = "[$] Sorry, this article is currently available to LWN suscribers only [https://lwn.net/subscribe/].";
                }
-               */
 
-               return comments;
+               story.setFullContent(fullContent);
+               story.setComments(comments);
        }
 
        private List<Comment> getComments(Element listing) {
                List<Comment> comments = new ArrayList<Comment>();
                for (Element commentElement : listing.children()) {
-                       if (commentElement.hasClass("comment")) {
+                       if (commentElement.hasClass("CommentBox")) {
                                Comment comment = getComment(commentElement);
                                if (!comment.isEmpty()) {
                                        comments.add(comment);
                                }
+                       } else if (commentElement.hasClass("Comment")) {
+                               if (comments.size() > 0) {
+                                       comments.get(comments.size() - 1).addAll(
+                                                       getComments(commentElement));
+                               }
                        }
                }
                return comments;
        }
 
        private Comment getComment(Element commentElement) {
-               String title = firstOrEmptyTag(commentElement, "h3");
-               String author = firstOrEmpty(commentElement, "h4");
-               String content = firstOrEmpty(commentElement, "comment-body");
+               String title = firstOrEmpty(commentElement, "CommentTitle").text();
+               String author = firstOrEmpty(commentElement, "CommentPoster").text();
 
                String date = "";
-               int pos = author.lastIndexOf(" on ");
+               int pos = author.lastIndexOf(" by ");
                if (pos >= 0) {
-                       date = author.substring(pos + " on ".length()).trim();
-                       author = author.substring(0, pos).trim();
-               }
+                       date = author.substring(0, pos).trim();
+                       author = author.substring(pos + " by ".length()).trim();
 
-               Comment comment = new Comment(commentElement.id(), author, title, date,
-                               content);
+                       if (author.startsWith("Posted ")) {
+                               author = author.substring("Posted ".length()).trim();
+                       }
+               }
 
-               Elements commentOutline = commentElement
-                               .getElementsByClass("comment-outline");
-               if (commentOutline.size() > 0) {
-                       comment.addAll(getComments(commentOutline.get(0)));
+               Element content = null;
+               Elements commentBodyElements = commentElement
+                               .getElementsByClass("CommentBody");
+               if (commentBodyElements.size() > 0) {
+                       content = commentBodyElements.get(0);
                }
 
+               Comment comment = new Comment(commentElement.id(), author, title, date,
+                               toLines(content));
+
                return comment;
        }
 
-       /**
-        * Get the first element of the given class, or an empty {@link String} if
-        * none found.
-        * 
-        * @param element
-        *            the element to look in
-        * @param className
-        *            the class to look for
-        * 
-        * @return the value or an empty {@link String}
-        */
-       private String firstOrEmpty(Element element, String className) {
-               Elements subElements = element.getElementsByClass(className);
-               if (subElements.size() > 0) {
-                       return subElements.get(0).text();
-               }
+       private List<String> toLines(Element element) {
+               return toLines(element, new QuoteProcessor() {
+                       @Override
+                       public String processText(String text) {
+                               while (text.startsWith(">")) { // comments
+                                       text = text.substring(1).trim();
+                               }
 
-               return "";
-       }
+                               return text;
+                       }
 
-       /**
-        * Get the first element of the given tag, or an empty {@link String} if
-        * none found.
-        * 
-        * @param element
-        *            the element to look in
-        * @param tagName
-        *            the tag to look for
-        * 
-        * @return the value or an empty {@link String}
-        */
-       private String firstOrEmptyTag(Element element, String tagName) {
-               Elements subElements = element.getElementsByTag(tagName);
-               if (subElements.size() > 0) {
-                       return subElements.get(0).text();
-               }
+                       @Override
+                       public boolean detectQuote(Node node) {
+                               if (node instanceof Element) {
+                                       Element elementNode = (Element) node;
+                                       if (elementNode.tagName().equals("blockquote")
+                                                       || elementNode.hasClass("QuotedText")) {
+                                               return true;
+                                       }
+                               }
+
+                               return false;
+                       }
 
-               return "";
+                       @Override
+                       public boolean ignoreNode(Node node) {
+                               if (node instanceof Element) {
+                                       Element elementNode = (Element) node;
+                                       if (elementNode.hasClass("CommentPoster")) {
+                                               return true;
+                                       }
+                               }
+
+                               return false;
+                       }
+               });
        }
 }