Fix layout issues in getContent() text

[gofetch.git] / src / be / nikiroo / gofetch / support / BasicSupport.java
diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java

index a748262857debc001dcb07cd557a7563a007e3d7..42761c9a82d1a7cd39bcd3bc96dd46e862f608a2 100644 (file)
--- a/src/be/nikiroo/gofetch/support/BasicSupport.java
+++ b/src/be/nikiroo/gofetch/support/BasicSupport.java
@@ -8,7 +8,9 @@ import java.text.SimpleDateFormat;
  import java.util.ArrayList;
  import java.util.Arrays;
  import java.util.Date;
+import java.util.HashMap;
  import java.util.List;
+import java.util.Map;
  import java.util.Map.Entry;
  
  import org.jsoup.helper.DataUtil;
@@ -32,14 +34,31 @@ import be.nikiroo.utils.StringUtils;
   */
  public abstract class BasicSupport {
         /**
-        * The downloader to use for all websites via {@link BasicSupport#open(URL)}
+        * The downloader to use for all web sites via
+        * {@link BasicSupport#open(URL)}
          */
         static private Downloader downloader = new Downloader("gofetcher");
  
         static private String preselector;
  
+       /**
+        * The optional cookies to use to get the site data.
+        */
+       private Map<String, String> cookies = new HashMap<String, String>();
+
         private Type type;
  
+       /**
+        * Login on the web site (this method does nothing by default, but can be
+        * overridden if needed).
+        * 
+        * @throws IOException
+        *             in case of I/O error
+        * 
+        */
+       public void login() throws IOException {
+       }
+
         /**
          * The website textual description, to add in the dispatcher page.
          * <p>
@@ -82,6 +101,7 @@ public abstract class BasicSupport {
         public List<Story> list() throws IOException {
                 List<Story> list = new ArrayList<Story>();
  
+               login();
                 for (Entry<URL, String> entry : getUrls()) {
                         URL url = entry.getKey();
                         String defaultCateg = entry.getValue();
@@ -280,21 +300,7 @@ public abstract class BasicSupport {
                         Document doc = DataUtil.load(in, "UTF-8", url.toString());
                         Element article = getFullArticle(doc);
                         if (article != null) {
-                               StringBuilder builder = new StringBuilder();
-                               ElementProcessor eProc = getElementProcessorFullArticle();
-                               if (eProc != null) {
-                                       for (String line : toLines(article, eProc)) {
-                                               builder.append(line + "\n");
-                                       }
-                               } else {
-                                       builder.append(article.text());
-                               }
-
-                               // Content is too tight with a single break per line:
-                               fullContent = builder.toString().replace("\n", "\n\n") //
-                                               .replace("\n\n\n\n", "\n\n") //
-                                               .replace("\n\n\n\n", "\n\n") //
-                                               .trim();
+                               fullContent = getArticleText(article);
                         }
  
                         if (fullContent.isEmpty()) {
@@ -312,7 +318,35 @@ public abstract class BasicSupport {
         }
  
         /**
-        * Return the full article if available.
+        * Return the text from this {@link Element}, using the
+        * {@link BasicSupport#getElementProcessorFullArticle()} processor logic.
+        * 
+        * @param article
+        *            the element to extract the text from
+        * 
+        * @return the text
+        */
+       protected String getArticleText(Element article) {
+               StringBuilder builder = new StringBuilder();
+               ElementProcessor eProc = getElementProcessorFullArticle();
+               if (eProc != null) {
+                       for (String line : toLines(article, eProc)) {
+                               builder.append(line + "\n");
+                       }
+               } else {
+                       builder.append(article.text());
+               }
+
+               // Content is too tight with a single break per line:
+               return builder.toString().replace("\n", "\n\n") //
+                               .replace("\n\n\n\n", "\n\n") //
+                               .replace("\n\n\n\n", "\n\n") //
+                               .trim();
+       }
+
+       /**
+        * Return the full article if available (this is the article to retrieve
+        * from the newly downloaded page at {@link Story#getUrlInternal()}).
          * 
          * @param doc
          *            the (full article) document to work on
@@ -362,7 +396,7 @@ public abstract class BasicSupport {
          *             in case of I/O error
          */
         protected InputStream open(URL url) throws IOException {
-               return downloader.open(url);
+               return downloader.open(url, url, cookies, null, null, null);
         }
  
         /**
@@ -503,6 +537,18 @@ public abstract class BasicSupport {
                 this.type = type;
         }
  
+       /**
+        * Add a cookie for all site connections.
+        * 
+        * @param name
+        *            the cookie name
+        * @param value
+        *            the value
+        */
+       protected void addCookie(String name, String value) {
+               cookies.put(name, value);
+       }
+
         /**
          * The {@link String} to append to the selector (the selector will be
          * constructed as "this string" then "/type/".
@@ -552,6 +598,9 @@ public abstract class BasicSupport {
                         case PHORONIX:
                                 support = new Phoronix();
                                 break;
+                       case SEPT_SUR_SEPT:
+                               support = new SeptSurSept();
+                               break;
                         }
  
                         if (support != null) {