X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Fgofetch%2Fsupport%2FLWN.java;h=c033104f92fed7e7a81a5673894c0463a8d0608e;hb=c9cffa913fe4ebc5cbe483cc5afe676e6cb54abd;hp=2fea78a864855529e79d3022e92da3476027d97a;hpb=bb0d9eb242c303df073dc80d39e24b8b10c1dddb;p=gofetch.git diff --git a/src/be/nikiroo/gofetch/support/LWN.java b/src/be/nikiroo/gofetch/support/LWN.java index 2fea78a..c033104 100644 --- a/src/be/nikiroo/gofetch/support/LWN.java +++ b/src/be/nikiroo/gofetch/support/LWN.java @@ -31,12 +31,12 @@ public class LWN extends BasicSupport { List list = new ArrayList(); URL url = new URL("https://lwn.net/"); - InputStream in = open(url); + InputStream in = downloader.open(url); Document doc = DataUtil.load(in, "UTF-8", url.toString()); - Elements stories = doc.getElementsByClass("pure-u-1"); - for (Element story : stories) { - Elements titles = story.getElementsByClass("Headline"); - Elements listings = story.getElementsByClass("BlurbListing"); + Elements articles = doc.getElementsByClass("pure-u-1"); + for (Element article : articles) { + Elements titles = article.getElementsByClass("Headline"); + Elements listings = article.getElementsByClass("BlurbListing"); if (titles.size() == 0) { continue; } @@ -59,8 +59,16 @@ public class LWN extends BasicSupport { } body = body.trim(); + int pos; + + String categ = ""; + pos = details.indexOf("]"); + if (pos >= 0) { + categ = details.substring(1, pos).trim(); + } + String author = ""; - int pos = details.indexOf(" by "); + pos = details.indexOf(" by "); if (pos >= 0) { author = details.substring(pos + " by ".length()).trim(); } @@ -69,12 +77,19 @@ public class LWN extends BasicSupport { pos = details.indexOf(" Posted "); if (pos >= 0) { date = details.substring(pos + " Posted ".length()).trim(); + pos = date.indexOf(" by "); + if (pos >= 0) { + date = date.substring(0, pos).trim(); + } } + // We extracted everything from details so... + details = ""; + String id = ""; String intUrl = ""; String extUrl = ""; - for (Element idElem : story.getElementsByTag("a")) { + for (Element idElem : article.getElementsByTag("a")) { // Last link is the story link intUrl = idElem.absUrl("href"); pos = intUrl.indexOf("#Comments"); @@ -84,8 +99,8 @@ public class LWN extends BasicSupport { id = intUrl.replaceAll("[^0-9]", ""); } - list.add(new Story(getType(), id, title, details, intUrl, extUrl, - body)); + list.add(new Story(getType(), id, title, author, date, categ, + details, intUrl, extUrl, body)); } return list; @@ -99,7 +114,7 @@ public class LWN extends BasicSupport { // Do not try the paid-for stories... if (!story.getTitle().startsWith("[$]")) { URL url = new URL(story.getUrlInternal()); - InputStream in = open(url); + InputStream in = downloader.open(url); Document doc = DataUtil.load(in, "UTF-8", url.toString()); Elements fullContentElements = doc .getElementsByClass("ArticleText"); @@ -139,8 +154,8 @@ public class LWN extends BasicSupport { } private Comment getComment(Element commentElement) { - String title = firstOrEmpty(commentElement, "CommentTitle"); - String author = firstOrEmpty(commentElement, "CommentPoster"); + String title = firstOrEmpty(commentElement, "CommentTitle").text(); + String author = firstOrEmpty(commentElement, "CommentPoster").text(); String date = ""; int pos = author.lastIndexOf(" by "); @@ -153,69 +168,54 @@ public class LWN extends BasicSupport { } } - String content = ""; + Element content = null; Elements commentBodyElements = commentElement .getElementsByClass("CommentBody"); if (commentBodyElements.size() > 0) { - for (Node contentNode : commentBodyElements.get(0).childNodes()) { - if (contentNode instanceof Element) { - Element contentElement = (Element) contentNode; - if (!contentElement.hasClass("CommentPoster")) { - content = content.trim() + " " - + contentElement.text().trim(); - } - } else { - content = content.trim() + " " - + contentNode.outerHtml().trim(); - } - - } - content = content.trim(); + content = commentBodyElements.get(0); } Comment comment = new Comment(commentElement.id(), author, title, date, - content); + toLines(content)); return comment; } - /** - * Get the first element of the given class, or an empty {@link String} if - * none found. - * - * @param element - * the element to look in - * @param className - * the class to look for - * - * @return the value or an empty {@link String} - */ - private String firstOrEmpty(Element element, String className) { - Elements subElements = element.getElementsByClass(className); - if (subElements.size() > 0) { - return subElements.get(0).text(); - } + private List toLines(Element element) { + return toLines(element, new BasicElementProcessor() { + @Override + public String processText(String text) { + while (text.startsWith(">")) { // comments + text = text.substring(1).trim(); + } - return ""; - } + return text; + } - /** - * Get the first element of the given tag, or an empty {@link String} if - * none found. - * - * @param element - * the element to look in - * @param tagName - * the tag to look for - * - * @return the value or an empty {@link String} - */ - private String firstOrEmptyTag(Element element, String tagName) { - Elements subElements = element.getElementsByTag(tagName); - if (subElements.size() > 0) { - return subElements.get(0).text(); - } + @Override + public boolean detectQuote(Node node) { + if (node instanceof Element) { + Element elementNode = (Element) node; + if (elementNode.tagName().equals("blockquote") + || elementNode.hasClass("QuotedText")) { + return true; + } + } - return ""; + return false; + } + + @Override + public boolean ignoreNode(Node node) { + if (node instanceof Element) { + Element elementNode = (Element) node; + if (elementNode.hasClass("CommentPoster")) { + return true; + } + } + + return false; + } + }); } }