From bb0d9eb242c303df073dc80d39e24b8b10c1dddb Mon Sep 17 00:00:00 2001 From: Niki Roo Date: Mon, 7 Aug 2017 20:06:47 +0200 Subject: [PATCH] Fix comments on LWN (should be ok now) + UTF8 html --- src/be/nikiroo/gofetch/output/Html.java | 28 +++++++-- src/be/nikiroo/gofetch/support/LWN.java | 82 ++++++++++++++++++------- 2 files changed, 85 insertions(+), 25 deletions(-) diff --git a/src/be/nikiroo/gofetch/output/Html.java b/src/be/nikiroo/gofetch/output/Html.java index b0ef7e2..5b022d1 100644 --- a/src/be/nikiroo/gofetch/output/Html.java +++ b/src/be/nikiroo/gofetch/output/Html.java @@ -36,17 +36,22 @@ public class Html extends Output { public String exportHeader(Story story) { StringBuilder builder = new StringBuilder(); + appendPre(builder); + builder.append("
\n"); appendHtml(builder, story, true); builder.append("
\n"); builder.append("
\n"); + appendPost(builder); + return builder.toString(); } @Override public String export(Story story) { StringBuilder builder = new StringBuilder(); + appendPre(builder); builder.append("
\n"); appendHtml(builder, story, false); @@ -60,13 +65,28 @@ public class Html extends Output { builder.append("
\n"); + appendPost(builder); + return builder.toString(); } + private void appendPre(StringBuilder builder) { + builder.append("\n"); + builder.append("\n"); + builder.append("\n"); + builder.append(" \n"); + builder.append(" \n"); + builder.append("\n"); + builder.append("\n"); + } + + private void appendPost(StringBuilder builder) { + builder.append("\n"); + } + private void appendHtml(StringBuilder builder, Comment comment, String space) { - builder.append(space).append( - "
"); + builder.append(space) + .append("
\n"); builder.append(space).append("

").append(comment.getTitle()) .append("

\n"); builder.append(space).append("
") @@ -76,7 +96,7 @@ public class Html extends Output { for (Comment subComment : comment) { appendHtml(builder, subComment, space + " "); } - builder.append(space).append("
"); + builder.append(space).append("
\n"); } private StringBuilder appendHtml(StringBuilder builder, Story story, diff --git a/src/be/nikiroo/gofetch/support/LWN.java b/src/be/nikiroo/gofetch/support/LWN.java index 08c9d5a..2fea78a 100644 --- a/src/be/nikiroo/gofetch/support/LWN.java +++ b/src/be/nikiroo/gofetch/support/LWN.java @@ -9,6 +9,7 @@ import java.util.List; import org.jsoup.helper.DataUtil; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; import org.jsoup.select.Elements; import be.nikiroo.gofetch.data.Comment; @@ -27,8 +28,6 @@ public class LWN extends BasicSupport { @Override public List list() throws IOException { - // TODO: comments + do not get comment for [$] stories - List list = new ArrayList(); URL url = new URL("https://lwn.net/"); @@ -94,48 +93,89 @@ public class LWN extends BasicSupport { @Override public void fetch(Story story) throws IOException { - /* - * URL url = new URL(story.getUrlInternal()); InputStream in = - * open(url); Document doc = DataUtil.load(in, "UTF-8", url.toString()); - * Elements listing = doc.getElementsByTag("main"); if (listing.size() > - * 0) { comments.addAll(getComments(listing.get(0))); } - */ + List comments = new ArrayList(); + String fullContent = story.getContent(); + + // Do not try the paid-for stories... + if (!story.getTitle().startsWith("[$]")) { + URL url = new URL(story.getUrlInternal()); + InputStream in = open(url); + Document doc = DataUtil.load(in, "UTF-8", url.toString()); + Elements fullContentElements = doc + .getElementsByClass("ArticleText"); + if (fullContentElements.size() > 0) { + // comments.addAll(getComments(listing.get(0))); + fullContent = fullContentElements.get(0).text(); + } + + Elements listing = doc.getElementsByClass("lwn-u-1"); + if (listing.size() > 0) { + comments.addAll(getComments(listing.get(0))); + } + } else { + fullContent = "[$] Sorry, this article is currently available to LWN suscribers only [https://lwn.net/subscribe/]."; + } + + story.setFullContent(fullContent); + story.setComments(comments); } private List getComments(Element listing) { List comments = new ArrayList(); for (Element commentElement : listing.children()) { - if (commentElement.hasClass("comment")) { + if (commentElement.hasClass("CommentBox")) { Comment comment = getComment(commentElement); if (!comment.isEmpty()) { comments.add(comment); } + } else if (commentElement.hasClass("Comment")) { + if (comments.size() > 0) { + comments.get(comments.size() - 1).addAll( + getComments(commentElement)); + } } } return comments; } private Comment getComment(Element commentElement) { - String title = firstOrEmptyTag(commentElement, "h3"); - String author = firstOrEmpty(commentElement, "h4"); - String content = firstOrEmpty(commentElement, "comment-body"); + String title = firstOrEmpty(commentElement, "CommentTitle"); + String author = firstOrEmpty(commentElement, "CommentPoster"); String date = ""; - int pos = author.lastIndexOf(" on "); + int pos = author.lastIndexOf(" by "); if (pos >= 0) { - date = author.substring(pos + " on ".length()).trim(); - author = author.substring(0, pos).trim(); + date = author.substring(0, pos).trim(); + author = author.substring(pos + " by ".length()).trim(); + + if (author.startsWith("Posted ")) { + author = author.substring("Posted ".length()).trim(); + } } - Comment comment = new Comment(commentElement.id(), author, title, date, - content); + String content = ""; + Elements commentBodyElements = commentElement + .getElementsByClass("CommentBody"); + if (commentBodyElements.size() > 0) { + for (Node contentNode : commentBodyElements.get(0).childNodes()) { + if (contentNode instanceof Element) { + Element contentElement = (Element) contentNode; + if (!contentElement.hasClass("CommentPoster")) { + content = content.trim() + " " + + contentElement.text().trim(); + } + } else { + content = content.trim() + " " + + contentNode.outerHtml().trim(); + } - Elements commentOutline = commentElement - .getElementsByClass("comment-outline"); - if (commentOutline.size() > 0) { - comment.addAll(getComments(commentOutline.get(0))); + } + content = content.trim(); } + Comment comment = new Comment(commentElement.id(), author, title, date, + content); + return comment; } -- 2.27.0