Small fixes in different places
[gofetch.git] / src / be / nikiroo / gofetch / support / TheRegister.java
index 35c619c4a2bc19a466676df56b7c5b432b2e197c..7fb152400f11a641193e47ddd78d2287baef81fb 100644 (file)
@@ -3,9 +3,7 @@ package be.nikiroo.gofetch.support;
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.URL;
-import java.text.SimpleDateFormat;
 import java.util.ArrayList;
-import java.util.Date;
 import java.util.List;
 
 import org.jsoup.helper.DataUtil;
@@ -18,6 +16,12 @@ import be.nikiroo.gofetch.data.Comment;
 import be.nikiroo.gofetch.data.Story;
 import be.nikiroo.utils.StringUtils;
 
+/**
+ * Support <a
+ * href="https://www.theregister.co.uk/">https://www.theregister.co.uk/</a>.
+ * 
+ * @author niki
+ */
 public class TheRegister extends BasicSupport {
        @Override
        public String getDescription() {
@@ -46,17 +50,18 @@ public class TheRegister extends BasicSupport {
                        String date = "";
                        String details = "";
                        String body = "";
+                       String categ = "";
+                       String author = ""; // nope
 
-                       String topic = "";
-                       Element topicElement = article.previousElementSibling();
-                       if (topicElement != null) {
-                               topic = "[" + topicElement.text().trim() + "] ";
+                       Element categElement = article.previousElementSibling();
+                       if (categElement != null) {
+                               categ = categElement.text().trim();
                        }
+
                        Element titleElement = article.getElementsByTag("h4").first();
                        if (titleElement != null) {
                                title = StringUtils.unhtml(titleElement.text()).trim();
                        }
-                       title = topic + title;
 
                        Element dateElement = article.getElementsByClass("time_stamp")
                                        .first();
@@ -80,8 +85,11 @@ public class TheRegister extends BasicSupport {
                                details += StringUtils.unhtml(detailsElement.text()).trim();
                        }
 
-                       list.add(new Story(getType(), id, title, details, intUrl, extUrl,
-                                       body));
+                       // We have some "details" but no content, so we switch them:
+                       body = details;
+                       details = "";
+                       list.add(new Story(getType(), id, title, author, date, categ,
+                                       details, intUrl, extUrl, body));
                }
 
                return list;
@@ -91,6 +99,7 @@ public class TheRegister extends BasicSupport {
        public void fetch(Story story) throws IOException {
                String fullContent = story.getContent();
                List<Comment> comments = new ArrayList<Comment>();
+               story.setComments(comments);
 
                URL url = new URL(story.getUrlInternal());
                InputStream in = downloader.open(url);
@@ -112,6 +121,8 @@ public class TheRegister extends BasicSupport {
                                                .trim();
                        }
 
+                       story.setFullContent(fullContent);
+
                        // Get comments URL then parse it
                        in.close();
                        in = null;
@@ -131,6 +142,10 @@ public class TheRegister extends BasicSupport {
                                        Element idE = post.getElementsByTag("a").first();
                                        if (idE != null) {
                                                id = idE.attr("id");
+                                               if (id.startsWith("c_")) {
+                                                       id = id.substring(2);
+                                               }
+
                                                Element dateE = idE.getElementsByTag("span").first();
                                                if (dateE != null) {
                                                        date = date(dateE.attr("data-epoch"));
@@ -154,13 +169,16 @@ public class TheRegister extends BasicSupport {
                                                                        @Override
                                                                        public boolean ignoreNode(Node node) {
                                                                                // TODO: ignore headlines/pub
+
+                                                                               // Remove the comment title (which has
+                                                                               // already been processed earlier)
                                                                                if (node instanceof Element) {
-                                                                                       Element el = (Element)node;
+                                                                                       Element el = (Element) node;
                                                                                        if ("h4".equals(el.tagName())) {
                                                                                                return true;
                                                                                        }
                                                                                }
-                                                                               
+
                                                                                return false;
                                                                        }
                                                                })) {
@@ -168,33 +186,32 @@ public class TheRegister extends BasicSupport {
                                                }
                                        }
 
-                                       comments.add(new Comment(id, author, title, date, content));
+                                       Comment comment = new Comment(id, author, title, date,
+                                                       content);
+                                       Comment parent = null;
+
+                                       Element inReplyTo = post.getElementsByClass("in-reply-to")
+                                                       .first();
+                                       if (inReplyTo != null) {
+                                               String parentId = inReplyTo.absUrl("href");
+                                               if (parentId != null && parentId.contains("/")) {
+                                                       int i = parentId.lastIndexOf('/');
+                                                       parentId = parentId.substring(i + 1);
+                                                       parent = story.getCommentById(parentId);
+                                               }
+                                       }
+
+                                       if (parent == null) {
+                                               comments.add(comment);
+                                       } else {
+                                               parent.add(comment);
+                                       }
                                }
                        }
-
-                       story.setFullContent(fullContent);
-                       story.setComments(comments);
                } finally {
                        if (in != null) {
                                in.close();
                        }
                }
        }
-
-       // Return display date from epoch String, or "" if error
-       private static String date(String epochString) {
-               long epoch = 0;
-               try {
-                       epoch = Long.parseLong(epochString);
-               } catch (Exception e) {
-                       epoch = 0;
-               }
-
-               if (epoch > 0) {
-                       return new SimpleDateFormat("dd MMM YYYY").format(new Date(
-                                       1000 * epoch));
-               }
-
-               return "";
-       }
 }