Fix comments on LWN (should be ok now) + UTF8 html
authorNiki Roo <niki@nikiroo.be>
Mon, 7 Aug 2017 18:06:47 +0000 (20:06 +0200)
committerNiki Roo <niki@nikiroo.be>
Mon, 7 Aug 2017 18:06:47 +0000 (20:06 +0200)
src/be/nikiroo/gofetch/output/Html.java
src/be/nikiroo/gofetch/support/LWN.java

index b0ef7e2e834698b66791fdbf6784ece99d935200..5b022d1f8789224a1ea11514bc55ac42da01b47f 100644 (file)
@@ -36,17 +36,22 @@ public class Html extends Output {
        public String exportHeader(Story story) {
                StringBuilder builder = new StringBuilder();
 
+               appendPre(builder);
+
                builder.append("<div class='story-header'>\n");
                appendHtml(builder, story, true);
                builder.append("<hr/>\n");
                builder.append("</div>\n");
 
+               appendPost(builder);
+
                return builder.toString();
        }
 
        @Override
        public String export(Story story) {
                StringBuilder builder = new StringBuilder();
+               appendPre(builder);
 
                builder.append("<div class='story'>\n");
                appendHtml(builder, story, false);
@@ -60,13 +65,28 @@ public class Html extends Output {
 
                builder.append("</div>\n");
 
+               appendPost(builder);
+
                return builder.toString();
        }
 
+       private void appendPre(StringBuilder builder) {
+               builder.append("<!DOCTYPE html>\n");
+               builder.append("<html>\n");
+               builder.append("<head>\n");
+               builder.append("  <meta http-equiv='content-type' content='text/html; charset=utf-8'>\n");
+               builder.append("  <meta name='viewport' content='width=device-width, initial-scale=1.0'>\n");
+               builder.append("</head>\n");
+               builder.append("<body>\n");
+       }
+
+       private void appendPost(StringBuilder builder) {
+               builder.append("</body>\n");
+       }
+
        private void appendHtml(StringBuilder builder, Comment comment, String space) {
-               builder.append(space).append(
-                               "<div class='comment' style='display: block; margin-left: "
-                                               + (20 * space.length()) + "px'>");
+               builder.append(space)
+                               .append("<div class='comment' style='display: block; margin-left: 80px'>\n");
                builder.append(space).append("  <h2>").append(comment.getTitle())
                                .append("</h2>\n");
                builder.append(space).append("  <div class='by'>")
@@ -76,7 +96,7 @@ public class Html extends Output {
                for (Comment subComment : comment) {
                        appendHtml(builder, subComment, space + "  ");
                }
-               builder.append(space).append("</div>");
+               builder.append(space).append("</div>\n");
        }
 
        private StringBuilder appendHtml(StringBuilder builder, Story story,
index 08c9d5a91cdc52bc3bbf61e336d6522c8a9f1451..2fea78a864855529e79d3022e92da3476027d97a 100644 (file)
@@ -9,6 +9,7 @@ import java.util.List;
 import org.jsoup.helper.DataUtil;
 import org.jsoup.nodes.Document;
 import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
 import org.jsoup.select.Elements;
 
 import be.nikiroo.gofetch.data.Comment;
@@ -27,8 +28,6 @@ public class LWN extends BasicSupport {
 
        @Override
        public List<Story> list() throws IOException {
-               // TODO: comments + do not get comment for [$] stories
-
                List<Story> list = new ArrayList<Story>();
 
                URL url = new URL("https://lwn.net/");
@@ -94,48 +93,89 @@ public class LWN extends BasicSupport {
 
        @Override
        public void fetch(Story story) throws IOException {
-               /*
-                * URL url = new URL(story.getUrlInternal()); InputStream in =
-                * open(url); Document doc = DataUtil.load(in, "UTF-8", url.toString());
-                * Elements listing = doc.getElementsByTag("main"); if (listing.size() >
-                * 0) { comments.addAll(getComments(listing.get(0))); }
-                */
+               List<Comment> comments = new ArrayList<Comment>();
+               String fullContent = story.getContent();
+
+               // Do not try the paid-for stories...
+               if (!story.getTitle().startsWith("[$]")) {
+                       URL url = new URL(story.getUrlInternal());
+                       InputStream in = open(url);
+                       Document doc = DataUtil.load(in, "UTF-8", url.toString());
+                       Elements fullContentElements = doc
+                                       .getElementsByClass("ArticleText");
+                       if (fullContentElements.size() > 0) {
+                               // comments.addAll(getComments(listing.get(0)));
+                               fullContent = fullContentElements.get(0).text();
+                       }
+
+                       Elements listing = doc.getElementsByClass("lwn-u-1");
+                       if (listing.size() > 0) {
+                               comments.addAll(getComments(listing.get(0)));
+                       }
+               } else {
+                       fullContent = "[$] Sorry, this article is currently available to LWN suscribers only [https://lwn.net/subscribe/].";
+               }
+
+               story.setFullContent(fullContent);
+               story.setComments(comments);
        }
 
        private List<Comment> getComments(Element listing) {
                List<Comment> comments = new ArrayList<Comment>();
                for (Element commentElement : listing.children()) {
-                       if (commentElement.hasClass("comment")) {
+                       if (commentElement.hasClass("CommentBox")) {
                                Comment comment = getComment(commentElement);
                                if (!comment.isEmpty()) {
                                        comments.add(comment);
                                }
+                       } else if (commentElement.hasClass("Comment")) {
+                               if (comments.size() > 0) {
+                                       comments.get(comments.size() - 1).addAll(
+                                                       getComments(commentElement));
+                               }
                        }
                }
                return comments;
        }
 
        private Comment getComment(Element commentElement) {
-               String title = firstOrEmptyTag(commentElement, "h3");
-               String author = firstOrEmpty(commentElement, "h4");
-               String content = firstOrEmpty(commentElement, "comment-body");
+               String title = firstOrEmpty(commentElement, "CommentTitle");
+               String author = firstOrEmpty(commentElement, "CommentPoster");
 
                String date = "";
-               int pos = author.lastIndexOf(" on ");
+               int pos = author.lastIndexOf(" by ");
                if (pos >= 0) {
-                       date = author.substring(pos + " on ".length()).trim();
-                       author = author.substring(0, pos).trim();
+                       date = author.substring(0, pos).trim();
+                       author = author.substring(pos + " by ".length()).trim();
+
+                       if (author.startsWith("Posted ")) {
+                               author = author.substring("Posted ".length()).trim();
+                       }
                }
 
-               Comment comment = new Comment(commentElement.id(), author, title, date,
-                               content);
+               String content = "";
+               Elements commentBodyElements = commentElement
+                               .getElementsByClass("CommentBody");
+               if (commentBodyElements.size() > 0) {
+                       for (Node contentNode : commentBodyElements.get(0).childNodes()) {
+                               if (contentNode instanceof Element) {
+                                       Element contentElement = (Element) contentNode;
+                                       if (!contentElement.hasClass("CommentPoster")) {
+                                               content = content.trim() + " "
+                                                               + contentElement.text().trim();
+                                       }
+                               } else {
+                                       content = content.trim() + " "
+                                                       + contentNode.outerHtml().trim();
+                               }
 
-               Elements commentOutline = commentElement
-                               .getElementsByClass("comment-outline");
-               if (commentOutline.size() > 0) {
-                       comment.addAll(getComments(commentOutline.get(0)));
+                       }
+                       content = content.trim();
                }
 
+               Comment comment = new Comment(commentElement.id(), author, title, date,
+                               content);
+
                return comment;
        }