Fix subtitles and too much content in EreNumerique
[gofetch.git] / src / be / nikiroo / gofetch / support / BasicSupport.java
index f3348e366f1ceeece3c6bec39ea01f031ffe8c95..b15fac7e5e2598d0d67c3bcf493c6dae03a0a8a1 100644 (file)
@@ -1,12 +1,11 @@
 package be.nikiroo.gofetch.support;
 
 import java.io.IOException;
-import java.io.InputStream;
-import java.net.URL;
-import java.net.URLConnection;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
 import java.util.ArrayList;
+import java.util.Date;
 import java.util.List;
-import java.util.zip.GZIPInputStream;
 
 import org.jsoup.helper.StringUtil;
 import org.jsoup.nodes.Element;
@@ -17,10 +16,37 @@ import org.jsoup.select.NodeTraversor;
 import org.jsoup.select.NodeVisitor;
 
 import be.nikiroo.gofetch.data.Story;
+import be.nikiroo.utils.Downloader;
 
+/**
+ * Base class for website support.
+ * 
+ * @author niki
+ */
 public abstract class BasicSupport {
+       /** The downloader to use for all websites. */
+       protected static Downloader downloader = new Downloader("gofetcher");
+
+       /**
+        * The support type (each website we support has a single type).
+        * 
+        * @author niki
+        */
        public enum Type {
-               SLASHDOT, PIPEDOT, LWN, LEMONDE,
+               /** EN: Any, but mostly IT/Sci */
+               SLASHDOT,
+               /** EN: Clone of Slashdot, mostly abandoned */
+               PIPEDOT,
+               /** EN: Linux */
+               LWN,
+               /** FR: Any */
+               LEMONDE,
+               /** EN: IT */
+               REGISTER,
+               /** FR: Linux */
+               TOO_LINUX,
+               /** FR: IT */
+               ERE_NUMERIQUE,
        }
 
        /**
@@ -44,7 +70,8 @@ public abstract class BasicSupport {
                 * 
                 * @param text
                 *            the text to process
-                * @return
+                * 
+                * @return the resulting text
                 */
                public String processText(String text);
 
@@ -73,6 +100,17 @@ public abstract class BasicSupport {
                 *         the usual automatic processing if not NULL
                 */
                public String manualProcessing(Node node);
+
+               /**
+                * This {@link Node} is a subtitle and should be treated as such
+                * (highlighted).
+                * 
+                * @param node
+                *            the node to check
+                * 
+                * @return NULL if it is not a subtitle, the subtitle to use if it is
+                */
+               public String isSubtitle(Node node);
        }
 
        /**
@@ -101,6 +139,11 @@ public abstract class BasicSupport {
                public String manualProcessing(Node node) {
                        return null;
                }
+
+               @Override
+               public String isSubtitle(Node node) {
+                       return null;
+               }
        }
 
        static private String preselector;
@@ -131,21 +174,49 @@ public abstract class BasicSupport {
         */
        abstract public void fetch(Story story) throws IOException;
 
+       /**
+        * The website textual description, to add in the dispatcher page.
+        * <p>
+        * Should be short.
+        * 
+        * @return the description
+        */
        abstract public String getDescription();
 
+       /**
+        * The gopher "selector" to use for output.
+        * <p>
+        * A kind of "URL path", like "/news/" or "/misc/news/" or...
+        * 
+        * @return the selector
+        */
        public String getSelector() {
                return getSelector(type);
        }
 
+       /**
+        * The support type.
+        * 
+        * @return the type
+        */
        public Type getType() {
                return type;
        }
 
+       /**
+        * The support type.
+        * 
+        * @param type
+        *            the new type
+        */
        protected void setType(Type type) {
                this.type = type;
        }
 
        /**
+        * The {@link String} to append to the selector (the selector will be
+        * constructed as "this string" then "/type/".
+        * 
         * @param preselector
         *            the preselector to set
         */
@@ -179,6 +250,15 @@ public abstract class BasicSupport {
                        case LEMONDE:
                                support = new LeMonde();
                                break;
+                       case REGISTER:
+                               support = new TheRegister();
+                               break;
+                       case TOO_LINUX:
+                               support = new TooLinux();
+                               break;
+                       case ERE_NUMERIQUE:
+                               support = new EreNumerique();
+                               break;
                        }
 
                        if (support != null) {
@@ -189,22 +269,21 @@ public abstract class BasicSupport {
                return support;
        }
 
+       /**
+        * The gopher "selector" to use for output for this type, using the
+        * preselector.
+        * <p>
+        * A kind of "URL path", like "/news/" or "/misc/news/" or...
+        * 
+        * @param type
+        *            the type to get the selector of
+        * 
+        * @return the selector
+        */
        static public String getSelector(Type type) {
                return preselector + "/" + type + "/";
        }
 
-       // TODO: check Downloader.java?
-       static protected InputStream open(URL url) throws IOException {
-               URLConnection conn = url.openConnection();
-               conn.connect();
-               InputStream in = conn.getInputStream();
-               if ("gzip".equals(conn.getContentEncoding())) {
-                       in = new GZIPInputStream(in);
-               }
-
-               return in;
-       }
-
        /**
         * Get the first {@link Element} of the given class, or an empty span
         * {@link Element} if none found.
@@ -271,6 +350,7 @@ public abstract class BasicSupport {
                                        String manual = null;
                                        boolean ignore = elementProcessor.ignoreNode(node)
                                                        || ignoredNodes.contains(node.parentNode());
+                                       // Manual processing
                                        if (!ignore) {
                                                manual = elementProcessor.manualProcessing(node);
                                                if (manual != null) {
@@ -279,6 +359,16 @@ public abstract class BasicSupport {
                                                }
                                        }
 
+                                       // Subtitle check
+                                       if (!ignore) {
+                                               String subtitle = elementProcessor.isSubtitle(node);
+                                               if (subtitle != null) {
+                                                       subtitle = subtitle.trim();
+                                                       currentLine.append("\n[ " + subtitle + " ]\n");
+                                                       ignore = true;
+                                               }
+                                       }
+
                                        if (ignore) {
                                                ignoredNodes.add(node);
                                                return;
@@ -358,4 +448,35 @@ public abstract class BasicSupport {
 
                return lines;
        }
+
+       /**
+        * Reformat the date if possible.
+        * 
+        * @param date
+        *            the input date
+        * 
+        * @return the reformated date, or the same value if it was not parsable
+        */
+       static protected String date(String date) {
+               SimpleDateFormat out = new SimpleDateFormat("yyyy/MM/dd");
+
+               long epoch = 0;
+               try {
+                       epoch = Long.parseLong(date.trim());
+               } catch (Exception e) {
+                       epoch = 0;
+               }
+
+               if (epoch > 0) {
+                       return out.format(new Date(1000 * epoch));
+               }
+
+               try {
+                       Date dat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXXX")
+                                       .parse(date.trim());
+                       return out.format(dat);
+               } catch (ParseException e) {
+                       return date;
+               }
+       }
 }