X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Fgofetch%2Fsupport%2FLWN.java;h=dba4c3bfa922f684729352baa2d438e075c1ac88;hb=27008a8782c0ed96e07c8dc39ff0ed1f5163a9d0;hp=e25bc92d519ed0d340b2d99695bfc167164ff5bf;hpb=2527107526aea628df09c3ad53432e9a5480fcd7;p=gofetch.git diff --git a/src/be/nikiroo/gofetch/support/LWN.java b/src/be/nikiroo/gofetch/support/LWN.java index e25bc92..dba4c3b 100644 --- a/src/be/nikiroo/gofetch/support/LWN.java +++ b/src/be/nikiroo/gofetch/support/LWN.java @@ -9,6 +9,7 @@ import java.util.List; import org.jsoup.helper.DataUtil; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; import org.jsoup.select.Elements; import be.nikiroo.gofetch.data.Comment; @@ -27,9 +28,6 @@ public class LWN extends BasicSupport { @Override public List list() throws IOException { - // TODO: comments + do not get comment for [$] stories - // + update body on getComment (global change, also LinuxToday) - List list = new ArrayList(); URL url = new URL("https://lwn.net/"); @@ -45,35 +43,33 @@ public class LWN extends BasicSupport { if (listings.size() == 0) { continue; } - + Element listing = listings.get(0); if (listing.children().size() < 2) { continue; } - String title = titles.get(0).text(); String details = listing.children().get(0).text(); String body = ""; // All but the first and two last children - for (int i = 1 ; i < listing.children().size() - 2; i++) { + for (int i = 1; i < listing.children().size() - 2; i++) { Element e = listing.children().get(i); body = body.trim() + " " + e.text().trim(); } body = body.trim(); - + String author = ""; int pos = details.indexOf(" by "); if (pos >= 0) { author = details.substring(pos + " by ".length()).trim(); } - + String date = ""; pos = details.indexOf(" Posted "); if (pos >= 0) { date = details.substring(pos + " Posted ".length()).trim(); } - String id = ""; String intUrl = ""; @@ -83,108 +79,128 @@ public class LWN extends BasicSupport { intUrl = idElem.absUrl("href"); pos = intUrl.indexOf("#Comments"); if (pos >= 0) { - intUrl = intUrl.substring(0, pos -1); + intUrl = intUrl.substring(0, pos - 1); } id = intUrl.replaceAll("[^0-9]", ""); } - list.add(new Story(getType(), id, title, details, intUrl, extUrl, body)); + list.add(new Story(getType(), id, title, details, intUrl, extUrl, + body)); } return list; } @Override - public List getComments(Story story) throws IOException { + public void fetch(Story story) throws IOException { List comments = new ArrayList(); + String fullContent = story.getContent(); + + // Do not try the paid-for stories... + if (!story.getTitle().startsWith("[$]")) { + URL url = new URL(story.getUrlInternal()); + InputStream in = open(url); + Document doc = DataUtil.load(in, "UTF-8", url.toString()); + Elements fullContentElements = doc + .getElementsByClass("ArticleText"); + if (fullContentElements.size() > 0) { + // comments.addAll(getComments(listing.get(0))); + fullContent = fullContentElements.get(0).text(); + } - /* - URL url = new URL(story.getUrlInternal()); - InputStream in = open(url); - Document doc = DataUtil.load(in, "UTF-8", url.toString()); - Elements listing = doc.getElementsByTag("main"); - if (listing.size() > 0) { - comments.addAll(getComments(listing.get(0))); + Elements listing = doc.getElementsByClass("lwn-u-1"); + if (listing.size() > 0) { + comments.addAll(getComments(listing.get(0))); + } + } else { + fullContent = "[$] Sorry, this article is currently available to LWN suscribers only [https://lwn.net/subscribe/]."; } - */ - return comments; + story.setFullContent(fullContent); + story.setComments(comments); } private List getComments(Element listing) { List comments = new ArrayList(); for (Element commentElement : listing.children()) { - if (commentElement.hasClass("comment")) { + if (commentElement.hasClass("CommentBox")) { Comment comment = getComment(commentElement); if (!comment.isEmpty()) { comments.add(comment); } + } else if (commentElement.hasClass("Comment")) { + if (comments.size() > 0) { + comments.get(comments.size() - 1).addAll( + getComments(commentElement)); + } } } return comments; } private Comment getComment(Element commentElement) { - String title = firstOrEmptyTag(commentElement, "h3"); - String author = firstOrEmpty(commentElement, "h4"); - String content = firstOrEmpty(commentElement, "comment-body"); + String title = firstOrEmpty(commentElement, "CommentTitle").text(); + String author = firstOrEmpty(commentElement, "CommentPoster").text(); String date = ""; - int pos = author.lastIndexOf(" on "); + int pos = author.lastIndexOf(" by "); if (pos >= 0) { - date = author.substring(pos + " on ".length()).trim(); - author = author.substring(0, pos).trim(); - } + date = author.substring(0, pos).trim(); + author = author.substring(pos + " by ".length()).trim(); - Comment comment = new Comment(commentElement.id(), author, title, date, - content); + if (author.startsWith("Posted ")) { + author = author.substring("Posted ".length()).trim(); + } + } - Elements commentOutline = commentElement - .getElementsByClass("comment-outline"); - if (commentOutline.size() > 0) { - comment.addAll(getComments(commentOutline.get(0))); + Element content = null; + Elements commentBodyElements = commentElement + .getElementsByClass("CommentBody"); + if (commentBodyElements.size() > 0) { + content = commentBodyElements.get(0); } + Comment comment = new Comment(commentElement.id(), author, title, date, + toLines(content)); + return comment; } - /** - * Get the first element of the given class, or an empty {@link String} if - * none found. - * - * @param element - * the element to look in - * @param className - * the class to look for - * - * @return the value or an empty {@link String} - */ - private String firstOrEmpty(Element element, String className) { - Elements subElements = element.getElementsByClass(className); - if (subElements.size() > 0) { - return subElements.get(0).text(); - } + private List toLines(Element element) { + return toLines(element, new QuoteProcessor() { + @Override + public String processText(String text) { + while (text.startsWith(">")) { // comments + text = text.substring(1).trim(); + } - return ""; - } + return text; + } - /** - * Get the first element of the given tag, or an empty {@link String} if - * none found. - * - * @param element - * the element to look in - * @param tagName - * the tag to look for - * - * @return the value or an empty {@link String} - */ - private String firstOrEmptyTag(Element element, String tagName) { - Elements subElements = element.getElementsByTag(tagName); - if (subElements.size() > 0) { - return subElements.get(0).text(); - } + @Override + public boolean detectQuote(Node node) { + if (node instanceof Element) { + Element elementNode = (Element) node; + if (elementNode.tagName().equals("blockquote") + || elementNode.hasClass("QuotedText")) { + return true; + } + } + + return false; + } - return ""; + @Override + public boolean ignoreNode(Node node) { + if (node instanceof Element) { + Element elementNode = (Element) node; + if (elementNode.hasClass("CommentPoster")) { + return true; + } + } + + return false; + } + }); } }