Separate story details components
authorNiki Roo <niki@nikiroo.be>
Fri, 23 Mar 2018 22:22:09 +0000 (23:22 +0100)
committerNiki Roo <niki@nikiroo.be>
Fri, 23 Mar 2018 22:22:09 +0000 (23:22 +0100)
src/be/nikiroo/gofetch/data/Story.java
src/be/nikiroo/gofetch/support/BasicSupport.java
src/be/nikiroo/gofetch/support/LWN.java
src/be/nikiroo/gofetch/support/LeMonde.java
src/be/nikiroo/gofetch/support/Pipedot.java
src/be/nikiroo/gofetch/support/Slashdot.java
src/be/nikiroo/gofetch/support/TheRegister.java
src/be/nikiroo/gofetch/support/TooLinux.java

index a2ad7d3b9e3a3660d18c951c6c3904b49a91c6b6..5944beccc56104692ad31bf1f1f3fd1fe01b8d52 100644 (file)
@@ -15,6 +15,9 @@ public class Story {
        private Type type;
        private String id;
        private String title;
+       private String author;
+       private String date;
+       private String category;
        private String details;
        private String urlInternal;
        private String urlExternal;
@@ -32,8 +35,15 @@ public class Story {
         *            the news ID
         * @param title
         *            the news title
+        * @param author
+        *            the author name for the details
+        * @param date
+        *            the post date for the details
+        * @param category
+        *            the category for the details
         * @param details
-        *            some details to add to the title
+        *            some details to add to the title (author, date and category
+        *            will be added in the getter if available)
         * @param urlInternal
         *            the {@link URL} to get this news on the associated news site
         * @param urlExternal
@@ -41,11 +51,15 @@ public class Story {
         * @param content
         *            the story content
         */
-       public Story(Type type, String id, String title, String details,
-                       String urlInternal, String urlExternal, String content) {
+       public Story(Type type, String id, String title, String author,
+                       String date, String category, String details, String urlInternal,
+                       String urlExternal, String content) {
                this.type = type;
                this.id = id;
                this.title = title;
+               this.author = author;
+               this.date = date;
+               this.category = category;
                this.details = details;
                this.urlInternal = urlInternal;
                this.urlExternal = urlExternal;
@@ -77,6 +91,17 @@ public class Story {
         * @return the details
         */
        public String getDetails() {
+               String details = "";
+
+               if (category != null && !category.trim().isEmpty())
+                       details += "[" + category + "] ";
+               if (date != null && !date.trim().isEmpty())
+                       details += date + " ";
+               if (author != null && !author.trim().isEmpty())
+                       details += "(" + this.author + ") ";
+               if (this.details != null && !this.details.trim().isEmpty())
+                       details += "\n" + this.details;
+
                return details;
        }
 
index 4067979d4dc1b3f1657189be91d85ec8cec345c0..6d930f6f4320bbeae13a14d098f846fa289fd96b 100644 (file)
@@ -1,7 +1,10 @@
 package be.nikiroo.gofetch.support;
 
 import java.io.IOException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
+import java.util.Date;
 import java.util.List;
 
 import org.jsoup.helper.StringUtil;
@@ -15,11 +18,33 @@ import org.jsoup.select.NodeVisitor;
 import be.nikiroo.gofetch.data.Story;
 import be.nikiroo.utils.Downloader;
 
+/**
+ * Base class for website support.
+ * 
+ * @author niki
+ */
 public abstract class BasicSupport {
+       /** The downloader to use for all websites. */
        protected static Downloader downloader = new Downloader("gofetcher");
 
+       /**
+        * The support type (each website we support has a single type).
+        * 
+        * @author niki
+        */
        public enum Type {
-               SLASHDOT, PIPEDOT, LWN, LEMONDE, REGISTER, TOOLINUX,
+               /** EN: Any, but mostly IT/Sci */
+               SLASHDOT,
+               /** EN: Clone of Slashdot, mostly abandoned */
+               PIPEDOT,
+               /** EN: Linux */
+               LWN,
+               /** FR: Any */
+               LEMONDE,
+               /** EN: IT */
+               REGISTER,
+               /** FR: Linux */
+               TOO_LINUX,
        }
 
        /**
@@ -43,7 +68,8 @@ public abstract class BasicSupport {
                 * 
                 * @param text
                 *            the text to process
-                * @return
+                * 
+                * @return the resulting text
                 */
                public String processText(String text);
 
@@ -130,21 +156,49 @@ public abstract class BasicSupport {
         */
        abstract public void fetch(Story story) throws IOException;
 
+       /**
+        * The website textual description, to add in the dispatcher page.
+        * <p>
+        * Should be short.
+        * 
+        * @return the description
+        */
        abstract public String getDescription();
 
+       /**
+        * The gopher "selector" to use for output.
+        * <p>
+        * A kind of "URL path", like "/news/" or "/misc/news/" or...
+        * 
+        * @return the selector
+        */
        public String getSelector() {
                return getSelector(type);
        }
 
+       /**
+        * The support type.
+        * 
+        * @return the type
+        */
        public Type getType() {
                return type;
        }
 
+       /**
+        * The support type.
+        * 
+        * @param type
+        *            the new type
+        */
        protected void setType(Type type) {
                this.type = type;
        }
 
        /**
+        * The {@link String} to append to the selector (the selector will be
+        * constructed as "this string" then "/type/".
+        * 
         * @param preselector
         *            the preselector to set
         */
@@ -181,7 +235,7 @@ public abstract class BasicSupport {
                        case REGISTER:
                                support = new TheRegister();
                                break;
-                       case TOOLINUX:
+                       case TOO_LINUX:
                                support = new TooLinux();
                                break;
                        }
@@ -194,6 +248,17 @@ public abstract class BasicSupport {
                return support;
        }
 
+       /**
+        * The gopher "selector" to use for output for this type, using the
+        * preselector.
+        * <p>
+        * A kind of "URL path", like "/news/" or "/misc/news/" or...
+        * 
+        * @param type
+        *            the type to get the selector of
+        * 
+        * @return the selector
+        */
        static public String getSelector(Type type) {
                return preselector + "/" + type + "/";
        }
@@ -256,7 +321,6 @@ public abstract class BasicSupport {
                final StringBuilder currentLine = new StringBuilder();
                final List<Integer> quoted = new ArrayList<Integer>();
                final List<Node> ignoredNodes = new ArrayList<Node>();
-               final List<String> footnotes = new ArrayList<String>();
 
                if (element != null) {
                        new NodeTraversor(new NodeVisitor() {
@@ -314,11 +378,6 @@ public abstract class BasicSupport {
                                                if (block && currentLine.length() > 0) {
                                                        currentLine.append("\n");
                                                }
-
-                                               if (!element.absUrl("href").trim().isEmpty()) {
-                                                       footnotes.add(element.absUrl("href"));
-                                                       currentLine.append("[" + footnotes.size() + "]");
-                                               }
                                        } else if (node instanceof TextNode) {
                                                TextNode textNode = (TextNode) node;
                                                String line = StringUtil.normaliseWhitespace(textNode
@@ -355,16 +414,37 @@ public abstract class BasicSupport {
                        lines.set(i, lines.get(i).replace("  ", " ").trim());
                }
 
-               if (footnotes.size() > 0) {
-                       lines.add("");
-                       lines.add("");
-                       lines.add("");
-                       lines.add("");
-                       for (int i = 0; i < footnotes.size(); i++) {
-                               lines.add("[" + (i + 1) + "] " + footnotes.get(i));
-                       }
+               return lines;
+       }
+
+       /**
+        * Reformat the date if possible.
+        * 
+        * @param date
+        *            the input date
+        * 
+        * @return the reformated date, or the same value if it was not parsable
+        */
+       static protected String date(String date) {
+               SimpleDateFormat out = new SimpleDateFormat("yyyy/MM/dd");
+
+               long epoch = 0;
+               try {
+                       epoch = Long.parseLong(date);
+               } catch (Exception e) {
+                       epoch = 0;
                }
 
-               return lines;
+               if (epoch > 0) {
+                       return out.format(new Date(1000 * epoch));
+               }
+
+               try {
+                       Date dat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXXX")
+                                       .parse(date.trim());
+                       return out.format(dat);
+               } catch (ParseException e) {
+                       return date;
+               }
        }
 }
index 27b539c5e42ffa6bec08c5c4e689377bf3742068..37a5a8f2fefcf8a88b71228e3ee62db01b88a86a 100644 (file)
@@ -59,8 +59,16 @@ public class LWN extends BasicSupport {
                        }
                        body = body.trim();
 
+                       int pos;
+
+                       String categ = "";
+                       pos = details.indexOf("]");
+                       if (pos >= 0) {
+                               categ = details.substring(1, pos + 1).trim();
+                       }
+
                        String author = "";
-                       int pos = details.indexOf(" by ");
+                       pos = details.indexOf(" by ");
                        if (pos >= 0) {
                                author = details.substring(pos + " by ".length()).trim();
                        }
@@ -69,8 +77,15 @@ public class LWN extends BasicSupport {
                        pos = details.indexOf(" Posted ");
                        if (pos >= 0) {
                                date = details.substring(pos + " Posted ".length()).trim();
+                               pos = details.indexOf(" by ");
+                               if (pos >= 0) {
+                                       author = details.substring(0, pos).trim();
+                               }
                        }
 
+                       // We extracted everything from details so...
+                       details = "";
+
                        String id = "";
                        String intUrl = "";
                        String extUrl = "";
@@ -84,8 +99,8 @@ public class LWN extends BasicSupport {
                                id = intUrl.replaceAll("[^0-9]", "");
                        }
 
-                       list.add(new Story(getType(), id, title, details, intUrl, extUrl,
-                                       body));
+                       list.add(new Story(getType(), id, title, author, date, categ,
+                                       details, intUrl, extUrl, body));
                }
 
                return list;
index d11ba797f7767cb89e84793fb2234de2b5417b6d..4ec2c30f39a6fcb0b344ca4dd387d629f35c8dc0 100644 (file)
@@ -15,6 +15,11 @@ import org.jsoup.select.Elements;
 import be.nikiroo.gofetch.data.Comment;
 import be.nikiroo.gofetch.data.Story;
 
+/**
+ * Support <a href="http://www.lemonde.fr/">http://www.lemonde.fr/</a>.
+ * 
+ * @author niki
+ */
 public class LeMonde extends BasicSupport {
        @Override
        public String getDescription() {
@@ -39,24 +44,25 @@ public class LeMonde extends BasicSupport {
                                                && contentElements.size() > 0) {
                                        String id = times.get(0).attr("datetime").replace(":", "_")
                                                        .replace("+", "_");
-                                       String title = "[" + topic + "] "
-                                                       + titleElements.get(0).text();
+                                       String title = titleElements.get(0).text();
+                                       String date = date(titleElements.get(0).text());
                                        String content = contentElements.get(0).text();
                                        String intUrl = "";
                                        String extUrl = "";
+                                       String author = "";
                                        String details = "";
 
                                        Elements detailsElements = article
                                                        .getElementsByClass("signature");
                                        if (detailsElements.size() > 0) {
-                                               details = detailsElements.get(0).text();
+                                               author = detailsElements.get(0).text();
                                        }
 
                                        Elements links = titleElements.get(0).getElementsByTag("a");
                                        if (links.size() > 0) {
                                                intUrl = links.get(0).absUrl("href");
-                                               list.add(new Story(getType(), id, title, details,
-                                                               intUrl, extUrl, content));
+                                               list.add(new Story(getType(), id, title, author, date,
+                                                               topic, details, intUrl, extUrl, content));
                                        }
                                }
                        }
index 17388b21030f5cbdbbd12518b9b0b1023594fff5..edbb8047df8f57b18661ff8b15a0ffb62e0f70f2 100644 (file)
@@ -82,8 +82,8 @@ public class Pipedot extends BasicSupport {
                                }
                        }
 
-                       list.add(new Story(getType(), id, title.text(), details, intUrl,
-                                       extUrl, body));
+                       list.add(new Story(getType(), id, title.text(), "", "", "",
+                                       details, intUrl, extUrl, body));
                }
 
                return list;
index 43d35f4afa13281d8565da54e113fab82529f35f..4746cc2eacfb28b41d3983ae6baddd34e5939dd0 100644 (file)
@@ -14,6 +14,7 @@ import org.jsoup.select.Elements;
 
 import be.nikiroo.gofetch.data.Comment;
 import be.nikiroo.gofetch.data.Story;
+import be.nikiroo.utils.StringUtils;
 
 /**
  * Support <a href='https://slashdot.org/'>https://slashdot.org/</a>.
@@ -63,14 +64,38 @@ public class Slashdot extends BasicSupport {
                                details = detailsElements.get(0).text();
                        }
 
+                       // details:
+                       // "Posted by AUTHOR on DATE from the further-crackdown dept."
+                       String author = "";
+                       int pos = details.indexOf(" on ");
+                       if (details.startsWith("Posted by ") && pos >= 0) {
+                               author = details.substring("Posted by ".length(), pos).trim();
+                       }
+                       pos = details.indexOf(" from the ");
+                       if (pos >= 0) {
+                               details = details.substring(pos).trim();
+                       }
+
                        String body = "";
                        Element bodyElement = doc.getElementById("text-" + id);
                        if (bodyElement != null) {
                                body = bodyElement.text();
                        }
 
-                       list.add(new Story(getType(), id, title.text(), details, intUrl,
-                                       extUrl, body));
+                       String categ = "";
+                       Element categElement = doc.getElementsByClass("topic").first();
+                       if (categElement != null) {
+                               categ = StringUtils.unhtml(categElement.text()).trim();
+                       }
+
+                       String date = "";
+                       Element dateElement = doc.getElementsByTag("time").first();
+                       if (dateElement != null) {
+                               date = StringUtils.unhtml(dateElement.text()).trim();
+                       }
+
+                       list.add(new Story(getType(), id, title.text(), author, date,
+                                       categ, details, intUrl, extUrl, body));
                }
 
                return list;
index 3d7496ab65c53f0d9d489b8f52fcadeb6512cce6..5903eaa64a2b8512712eb623290deed4db244db5 100644 (file)
@@ -3,9 +3,7 @@ package be.nikiroo.gofetch.support;
 import java.io.IOException;
 import java.io.InputStream;
 import java.net.URL;
-import java.text.SimpleDateFormat;
 import java.util.ArrayList;
-import java.util.Date;
 import java.util.List;
 
 import org.jsoup.helper.DataUtil;
@@ -18,6 +16,12 @@ import be.nikiroo.gofetch.data.Comment;
 import be.nikiroo.gofetch.data.Story;
 import be.nikiroo.utils.StringUtils;
 
+/**
+ * Support <a
+ * href="https://www.theregister.co.uk/">https://www.theregister.co.uk/</a>.
+ * 
+ * @author niki
+ */
 public class TheRegister extends BasicSupport {
        @Override
        public String getDescription() {
@@ -46,17 +50,18 @@ public class TheRegister extends BasicSupport {
                        String date = "";
                        String details = "";
                        String body = "";
+                       String categ = "";
+                       String author = ""; // nope
 
-                       String topic = "";
-                       Element topicElement = article.previousElementSibling();
-                       if (topicElement != null) {
-                               topic = "[" + topicElement.text().trim() + "] ";
+                       Element categElement = article.previousElementSibling();
+                       if (categElement != null) {
+                               categ = categElement.text().trim();
                        }
+
                        Element titleElement = article.getElementsByTag("h4").first();
                        if (titleElement != null) {
                                title = StringUtils.unhtml(titleElement.text()).trim();
                        }
-                       title = topic + title;
 
                        Element dateElement = article.getElementsByClass("time_stamp")
                                        .first();
@@ -80,8 +85,8 @@ public class TheRegister extends BasicSupport {
                                details += StringUtils.unhtml(detailsElement.text()).trim();
                        }
 
-                       list.add(new Story(getType(), id, title, details, intUrl, extUrl,
-                                       body));
+                       list.add(new Story(getType(), id, title, author, date, categ,
+                                       details, intUrl, extUrl, body));
                }
 
                return list;
@@ -206,21 +211,4 @@ public class TheRegister extends BasicSupport {
                        }
                }
        }
-
-       // Return display date from epoch String, or "" if error
-       private static String date(String epochString) {
-               long epoch = 0;
-               try {
-                       epoch = Long.parseLong(epochString);
-               } catch (Exception e) {
-                       epoch = 0;
-               }
-
-               if (epoch > 0) {
-                       return new SimpleDateFormat("dd MMM YYYY").format(new Date(
-                                       1000 * epoch));
-               }
-
-               return "";
-       }
 }
index c875783def6c450bedd7c4c306778461e808274c..806133128eb2ec56111e0828a46ede739637d4ee 100644 (file)
@@ -16,10 +16,15 @@ import be.nikiroo.gofetch.data.Comment;
 import be.nikiroo.gofetch.data.Story;
 import be.nikiroo.utils.StringUtils;
 
+/**
+ * Support <a href="https://www.toolinux.com/">https://www.toolinux.com/</a>.
+ * 
+ * @author niki
+ */
 public class TooLinux extends BasicSupport {
        @Override
        public String getDescription() {
-               return "TooLinux: Premier quotidien francophone d'actualité généraliste sur Linux, les logiciels libres et l'interopérabilité, depuis mars 2000.";
+               return "TooLinux: Actualité généraliste sur Linux et les logiciels libres";
        }
 
        @Override
@@ -38,6 +43,8 @@ public class TooLinux extends BasicSupport {
                        String date = "";
                        String details = "";
                        String body = "";
+                       String author = ""; // nope
+                       String categ = ""; // nope
 
                        Element urlElement = article.getElementsByTag("a").first();
                        if (urlElement != null) {
@@ -64,13 +71,12 @@ public class TooLinux extends BasicSupport {
 
                        Element detailsElement = article.getElementsByClass("introduction")
                                        .first();
-                       details = "(" + date + ") ";
                        if (detailsElement != null) {
                                details += StringUtils.unhtml(detailsElement.text()).trim();
                        }
 
-                       list.add(new Story(getType(), id, title, details, intUrl, extUrl,
-                                       body));
+                       list.add(new Story(getType(), id, title, author, date, categ,
+                                       details, intUrl, extUrl, body));
                }
 
                return list;