X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Fgofetch%2Fsupport%2FTheRegister.java;h=7fb152400f11a641193e47ddd78d2287baef81fb;hb=c9cffa913fe4ebc5cbe483cc5afe676e6cb54abd;hp=35c619c4a2bc19a466676df56b7c5b432b2e197c;hpb=d28c4aac3f42d9de93e3969e86a7c84e2d2e963a;p=gofetch.git diff --git a/src/be/nikiroo/gofetch/support/TheRegister.java b/src/be/nikiroo/gofetch/support/TheRegister.java index 35c619c..7fb1524 100644 --- a/src/be/nikiroo/gofetch/support/TheRegister.java +++ b/src/be/nikiroo/gofetch/support/TheRegister.java @@ -3,9 +3,7 @@ package be.nikiroo.gofetch.support; import java.io.IOException; import java.io.InputStream; import java.net.URL; -import java.text.SimpleDateFormat; import java.util.ArrayList; -import java.util.Date; import java.util.List; import org.jsoup.helper.DataUtil; @@ -18,6 +16,12 @@ import be.nikiroo.gofetch.data.Comment; import be.nikiroo.gofetch.data.Story; import be.nikiroo.utils.StringUtils; +/** + * Support https://www.theregister.co.uk/. + * + * @author niki + */ public class TheRegister extends BasicSupport { @Override public String getDescription() { @@ -46,17 +50,18 @@ public class TheRegister extends BasicSupport { String date = ""; String details = ""; String body = ""; + String categ = ""; + String author = ""; // nope - String topic = ""; - Element topicElement = article.previousElementSibling(); - if (topicElement != null) { - topic = "[" + topicElement.text().trim() + "] "; + Element categElement = article.previousElementSibling(); + if (categElement != null) { + categ = categElement.text().trim(); } + Element titleElement = article.getElementsByTag("h4").first(); if (titleElement != null) { title = StringUtils.unhtml(titleElement.text()).trim(); } - title = topic + title; Element dateElement = article.getElementsByClass("time_stamp") .first(); @@ -80,8 +85,11 @@ public class TheRegister extends BasicSupport { details += StringUtils.unhtml(detailsElement.text()).trim(); } - list.add(new Story(getType(), id, title, details, intUrl, extUrl, - body)); + // We have some "details" but no content, so we switch them: + body = details; + details = ""; + list.add(new Story(getType(), id, title, author, date, categ, + details, intUrl, extUrl, body)); } return list; @@ -91,6 +99,7 @@ public class TheRegister extends BasicSupport { public void fetch(Story story) throws IOException { String fullContent = story.getContent(); List comments = new ArrayList(); + story.setComments(comments); URL url = new URL(story.getUrlInternal()); InputStream in = downloader.open(url); @@ -112,6 +121,8 @@ public class TheRegister extends BasicSupport { .trim(); } + story.setFullContent(fullContent); + // Get comments URL then parse it in.close(); in = null; @@ -131,6 +142,10 @@ public class TheRegister extends BasicSupport { Element idE = post.getElementsByTag("a").first(); if (idE != null) { id = idE.attr("id"); + if (id.startsWith("c_")) { + id = id.substring(2); + } + Element dateE = idE.getElementsByTag("span").first(); if (dateE != null) { date = date(dateE.attr("data-epoch")); @@ -154,13 +169,16 @@ public class TheRegister extends BasicSupport { @Override public boolean ignoreNode(Node node) { // TODO: ignore headlines/pub + + // Remove the comment title (which has + // already been processed earlier) if (node instanceof Element) { - Element el = (Element)node; + Element el = (Element) node; if ("h4".equals(el.tagName())) { return true; } } - + return false; } })) { @@ -168,33 +186,32 @@ public class TheRegister extends BasicSupport { } } - comments.add(new Comment(id, author, title, date, content)); + Comment comment = new Comment(id, author, title, date, + content); + Comment parent = null; + + Element inReplyTo = post.getElementsByClass("in-reply-to") + .first(); + if (inReplyTo != null) { + String parentId = inReplyTo.absUrl("href"); + if (parentId != null && parentId.contains("/")) { + int i = parentId.lastIndexOf('/'); + parentId = parentId.substring(i + 1); + parent = story.getCommentById(parentId); + } + } + + if (parent == null) { + comments.add(comment); + } else { + parent.add(comment); + } } } - - story.setFullContent(fullContent); - story.setComments(comments); } finally { if (in != null) { in.close(); } } } - - // Return display date from epoch String, or "" if error - private static String date(String epochString) { - long epoch = 0; - try { - epoch = Long.parseLong(epochString); - } catch (Exception e) { - epoch = 0; - } - - if (epoch > 0) { - return new SimpleDateFormat("dd MMM YYYY").format(new Date( - 1000 * epoch)); - } - - return ""; - } }