Reddit test: add expected files

[gofetch.git] / src / be / nikiroo / gofetch / support / BasicSupport.java
diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java

index 972852333fed4707ecd9ed2812e45c7f2ee00f8b..17a3c151750e9c7e56169fe4fd8be87b9eb39855 100644 (file)
--- a/src/be/nikiroo/gofetch/support/BasicSupport.java
+++ b/src/be/nikiroo/gofetch/support/BasicSupport.java
@@ -8,7 +8,9 @@ import java.text.SimpleDateFormat;
  import java.util.ArrayList;
  import java.util.Arrays;
  import java.util.Date;
+import java.util.HashMap;
  import java.util.List;
+import java.util.Map;
  import java.util.Map.Entry;
  
  import org.jsoup.helper.DataUtil;
@@ -31,13 +33,32 @@ import be.nikiroo.utils.StringUtils;
   * @author niki
   */
  public abstract class BasicSupport {
-       /** The downloader to use for all websites. */
-       static protected Downloader downloader = new Downloader("gofetcher");
+       /**
+        * The downloader to use for all web sites via
+        * {@link BasicSupport#open(URL)}
+        */
+       static private Downloader downloader = new Downloader("gofetcher");
  
         static private String preselector;
  
+       /**
+        * The optional cookies to use to get the site data.
+        */
+       private Map<String, String> cookies = new HashMap<String, String>();
+
         private Type type;
  
+       /**
+        * Login on the web site (this method does nothing by default, but can be
+        * overridden if needed).
+        * 
+        * @throws IOException
+        *             in case of I/O error
+        * 
+        */
+       public void login() throws IOException {
+       }
+
         /**
          * The website textual description, to add in the dispatcher page.
          * <p>
@@ -55,7 +76,7 @@ public abstract class BasicSupport {
          * @return the selector
          */
         public String getSelector() {
-               return getSelector(type);
+               return getSelector(getType());
         }
  
         /**
@@ -80,6 +101,7 @@ public abstract class BasicSupport {
         public List<Story> list() throws IOException {
                 List<Story> list = new ArrayList<Story>();
  
+               login();
                 for (Entry<URL, String> entry : getUrls()) {
                         URL url = entry.getKey();
                         String defaultCateg = entry.getValue();
@@ -87,7 +109,7 @@ public abstract class BasicSupport {
                                 defaultCateg = "";
                         }
  
-                       InputStream in = downloader.open(url);
+                       InputStream in = open(url);
                         Document doc = DataUtil.load(in, "UTF-8", url.toString());
                         List<Element> articles = getArticles(doc);
                         for (Element article : articles) {
@@ -111,9 +133,9 @@ public abstract class BasicSupport {
                                                 id = "0" + id;
                                         }
                                 } else {
-                                       id = date.replace(":", "_").replace("+", "_");
+                                       id = date.replace(":", "_").replace("+", "_").replace("/", "-");
                                 }
-
+                               
                                 date = date(date);
  
                                 list.add(new Story(getType(), id, title, author, date, categ,
@@ -273,26 +295,12 @@ public abstract class BasicSupport {
                 String fullContent = "";
  
                 URL url = new URL(story.getUrlInternal());
-               InputStream in = downloader.open(url);
+               InputStream in = open(url);
                 try {
                         Document doc = DataUtil.load(in, "UTF-8", url.toString());
                         Element article = getFullArticle(doc);
                         if (article != null) {
-                               StringBuilder builder = new StringBuilder();
-                               ElementProcessor eProc = getElementProcessorFullArticle();
-                               if (eProc != null) {
-                                       for (String line : toLines(article, eProc)) {
-                                               builder.append(line + "\n");
-                                       }
-                               } else {
-                                       builder.append(article.text());
-                               }
-
-                               // Content is too tight with a single break per line:
-                               fullContent = builder.toString().replace("\n", "\n\n") //
-                                               .replace("\n\n\n\n", "\n\n") //
-                                               .replace("\n\n\n\n", "\n\n") //
-                                               .trim();
+                               fullContent = getArticleText(article);
                         }
  
                         if (fullContent.isEmpty()) {
@@ -310,7 +318,35 @@ public abstract class BasicSupport {
         }
  
         /**
-        * Return the full article if available.
+        * Return the text from this {@link Element}, using the
+        * {@link BasicSupport#getElementProcessorFullArticle()} processor logic.
+        * 
+        * @param article
+        *            the element to extract the text from
+        * 
+        * @return the text
+        */
+       protected String getArticleText(Element article) {
+               StringBuilder builder = new StringBuilder();
+               ElementProcessor eProc = getElementProcessorFullArticle();
+               if (eProc != null) {
+                       for (String line : toLines(article, eProc)) {
+                               builder.append(line + "\n");
+                       }
+               } else {
+                       builder.append(article.text());
+               }
+
+               // Content is too tight with a single break per line:
+               return builder.toString().replace("\n", "\n\n") //
+                               .replace("\n\n\n\n", "\n\n") //
+                               .replace("\n\n\n\n", "\n\n") //
+                               .trim();
+       }
+
+       /**
+        * Return the full article if available (this is the article to retrieve
+        * from the newly downloaded page at {@link Story#getUrlInternal()}).
          * 
          * @param doc
          *            the (full article) document to work on
@@ -346,6 +382,23 @@ public abstract class BasicSupport {
          */
         abstract protected ElementProcessor getElementProcessorFullArticle();
  
+       /**
+        * Open a network resource.
+        * <p>
+        * You need to close the returned {@link InputStream} when done.
+        * 
+        * @param url
+        *            the source to open
+        * 
+        * @return the content
+        * 
+        * @throws IOException
+        *             in case of I/O error
+        */
+       protected InputStream open(URL url) throws IOException {
+               return downloader.open(url, url, cookies, null, null, null);
+       }
+
         /**
          * Convert the comment elements into {@link Comment}s
          * 
@@ -484,6 +537,18 @@ public abstract class BasicSupport {
                 this.type = type;
         }
  
+       /**
+        * Add a cookie for all site connections.
+        * 
+        * @param name
+        *            the cookie name
+        * @param value
+        *            the value
+        */
+       protected void addCookie(String name, String value) {
+               cookies.put(name, value);
+       }
+
         /**
          * The {@link String} to append to the selector (the selector will be
          * constructed as "this string" then "/type/".
@@ -533,6 +598,12 @@ public abstract class BasicSupport {
                         case PHORONIX:
                                 support = new Phoronix();
                                 break;
+                       case SEPT_SUR_SEPT:
+                               support = new SeptSurSept();
+                               break;
+                       case REDDIT:
+                               support = new Reddit();
+                               break;
                         }
  
                         if (support != null) {
@@ -751,7 +822,7 @@ public abstract class BasicSupport {
                         Date dat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXXX")
                                         .parse(date.trim());
                         return out.format(dat);
-               } catch (ParseException e) {
+               } catch (Exception e) {
                         return date;
                 }
         }