Add support for 7sur7.be
authorNiki Roo <niki@nikiroo.be>
Wed, 26 Sep 2018 18:02:19 +0000 (20:02 +0200)
committerNiki Roo <niki@nikiroo.be>
Wed, 26 Sep 2018 18:02:19 +0000 (20:02 +0200)
src/be/nikiroo/gofetch/Main.java
src/be/nikiroo/gofetch/support/BasicSupport.java
src/be/nikiroo/gofetch/support/SeptSurSept.java [new file with mode: 0644]
src/be/nikiroo/gofetch/support/Type.java
src/be/nikiroo/gofetch/test/Test.java
src/be/nikiroo/gofetch/test/TestSeptSurSept.java [new file with mode: 0644]

index e4078d8d1009e454dfcea46ea3907ce45212a7ca..1672c88ef5ae140317760d98798ea7f775bcc1b1 100644 (file)
@@ -105,4 +105,4 @@ public class Main {
 
                new Fetcher(dir, preselector, type, maxStories, hostname, port).start();
        }
-}
\ No newline at end of file
+}
index a748262857debc001dcb07cd557a7563a007e3d7..dcd5e6ea295ad11f18372035d7b9033b1b7c5ba0 100644 (file)
@@ -8,7 +8,9 @@ import java.text.SimpleDateFormat;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Date;
+import java.util.HashMap;
 import java.util.List;
+import java.util.Map;
 import java.util.Map.Entry;
 
 import org.jsoup.helper.DataUtil;
@@ -32,14 +34,31 @@ import be.nikiroo.utils.StringUtils;
  */
 public abstract class BasicSupport {
        /**
-        * The downloader to use for all websites via {@link BasicSupport#open(URL)}
+        * The downloader to use for all web sites via
+        * {@link BasicSupport#open(URL)}
         */
        static private Downloader downloader = new Downloader("gofetcher");
 
        static private String preselector;
 
+       /**
+        * The optional cookies to use to get the site data.
+        */
+       private Map<String, String> cookies = new HashMap<String, String>();
+
        private Type type;
 
+       /**
+        * Login on the web site (this method does nothing by default, but can be
+        * overridden if needed).
+        * 
+        * @throws IOException
+        *             in case of I/O error
+        * 
+        */
+       public void login() throws IOException {
+       }
+
        /**
         * The website textual description, to add in the dispatcher page.
         * <p>
@@ -82,6 +101,7 @@ public abstract class BasicSupport {
        public List<Story> list() throws IOException {
                List<Story> list = new ArrayList<Story>();
 
+               login();
                for (Entry<URL, String> entry : getUrls()) {
                        URL url = entry.getKey();
                        String defaultCateg = entry.getValue();
@@ -312,7 +332,8 @@ public abstract class BasicSupport {
        }
 
        /**
-        * Return the full article if available.
+        * Return the full article if available (this is the article to retrieve
+        * from the newly downloaded page at {@link Story#getUrlInternal()}).
         * 
         * @param doc
         *            the (full article) document to work on
@@ -362,7 +383,7 @@ public abstract class BasicSupport {
         *             in case of I/O error
         */
        protected InputStream open(URL url) throws IOException {
-               return downloader.open(url);
+               return downloader.open(url, url, cookies, null, null, null);
        }
 
        /**
@@ -503,6 +524,18 @@ public abstract class BasicSupport {
                this.type = type;
        }
 
+       /**
+        * Add a cookie for all site connections.
+        * 
+        * @param name
+        *            the cookie name
+        * @param value
+        *            the value
+        */
+       protected void addCookie(String name, String value) {
+               cookies.put(name, value);
+       }
+
        /**
         * The {@link String} to append to the selector (the selector will be
         * constructed as "this string" then "/type/".
@@ -552,6 +585,9 @@ public abstract class BasicSupport {
                        case PHORONIX:
                                support = new Phoronix();
                                break;
+                       case SEPT_SUR_SEPT:
+                               support = new SeptSurSept();
+                               break;
                        }
 
                        if (support != null) {
diff --git a/src/be/nikiroo/gofetch/support/SeptSurSept.java b/src/be/nikiroo/gofetch/support/SeptSurSept.java
new file mode 100644 (file)
index 0000000..6134f57
--- /dev/null
@@ -0,0 +1,232 @@
+package be.nikiroo.gofetch.support;
+
+import java.io.IOException;
+import java.net.URL;
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map.Entry;
+
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
+
+/**
+ * Support <a href="https://www.7sur7.be/">https://www.7sur7.be/</a>.
+ * 
+ * @author niki
+ */
+public class SeptSurSept extends BasicSupport {
+       @Override
+       public String getDescription() {
+               return "7SUR7.be Info, sport et showbiz, 24/24, 7/7";
+       }
+
+       @Override
+       public void login() throws IOException {
+               addCookie("pwrefr2", "");
+               addCookie("pwv-atXMVFeyFP1Ki09i", "1");
+               addCookie("pwg-atXMVFeyFP1Ki09i", "basic");
+
+               addCookie("pwv", "1");
+               addCookie("pw", "functional");
+
+               URL url = new URL("https://www.7sur7.be/7s7/privacy/callback.do"
+                               + "?redirectUri=/" + "&pwv=1" + "&pws=functional%7Canalytics"
+                               + "&days=3650" + "&referrer=");
+
+               open(url).close();
+       }
+
+       @Override
+       protected List<Entry<URL, String>> getUrls() throws IOException {
+               List<Entry<URL, String>> urls = new ArrayList<Entry<URL, String>>();
+               URL url = new URL("https://www.7sur7.be/");
+               urls.add(new AbstractMap.SimpleEntry<URL, String>(url, ""));
+
+               return urls;
+       }
+
+       @Override
+       protected List<Element> getArticles(Document doc) {
+               return doc.getElementsByClass("clip");
+       }
+
+       @Override
+       protected String getArticleId(Document doc, Element article) {
+               String id = article.attr("id");
+               if (id != null && id.startsWith("clip")) {
+                       return id.substring("clip".length());
+               }
+
+               return null;
+       }
+
+       @Override
+       protected String getArticleTitle(Document doc, Element article) {
+               return article.attr("data-title");
+       }
+
+       @Override
+       protected String getArticleAuthor(Document doc, Element article) {
+               return "";
+       }
+
+       @Override
+       protected String getArticleDate(Document doc, Element article) {
+               return article.attr("data-date");
+       }
+
+       @Override
+       protected String getArticleCategory(Document doc, Element article,
+                       String currentCategory) {
+               Element parent = article.parent();
+               if (parent != null) {
+                       Element catElement = parent.previousElementSibling();
+                       if (catElement != null) {
+                               return catElement.text();
+                       }
+               }
+
+               return "";
+       }
+
+       @Override
+       protected String getArticleDetails(Document doc, Element article) {
+               return "";
+       }
+
+       @Override
+       protected String getArticleIntUrl(Document doc, Element article) {
+               return article.absUrl("href");
+       }
+
+       @Override
+       protected String getArticleExtUrl(Document doc, Element article) {
+               return "";
+       }
+
+       @Override
+       protected String getArticleContent(Document doc, Element article) {
+               return article.attr("data-intro").trim();
+       }
+
+       @Override
+       protected Element getFullArticle(Document doc) {
+               return doc.getElementById("detail_content");
+       }
+
+       @Override
+       protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) {
+               return getSubCommentElements(doc.getElementsByClass("comment-list")
+                               .first());
+       }
+
+       @Override
+       protected ElementProcessor getElementProcessorFullArticle() {
+               return new BasicElementProcessor() {
+                       @Override
+                       public boolean ignoreNode(Node node) {
+                               return node.attr("class").contains("chapo");
+                       }
+
+                       @Override
+                       public String isSubtitle(Node node) {
+                               if (node instanceof Element) {
+                                       Element element = (Element) node;
+                                       if (element.tagName().startsWith("h")
+                                                       && element.tagName().length() == 2) {
+                                               return element.text();
+                                       }
+                               }
+                               return null;
+                       }
+               };
+       }
+
+       @Override
+       protected List<Element> getCommentCommentPosts(Document doc,
+                       Element container) {
+               return getSubCommentElements(container.getElementsByClass("children")
+                               .first());
+       }
+
+       @Override
+       protected String getCommentId(Element post) {
+               Element idE = post.getElementsByTag("a").first();
+               if (idE != null) {
+                       return idE.attr("id");
+               }
+
+               return "";
+       }
+
+       @Override
+       protected String getCommentAuthor(Element post) {
+               // Since we have no title, we switch with author
+               return "";
+       }
+
+       @Override
+       protected String getCommentTitle(Element post) {
+               // Since we have no title, we switch with author
+               Element authorE = post.getElementsByTag("footer").first();
+               if (authorE != null) {
+                       authorE = authorE.getElementsByTag("cite").first();
+               }
+               if (authorE != null) {
+                       return authorE.text();
+               }
+
+               return "";
+       }
+
+       @Override
+       protected String getCommentDate(Element post) {
+               Element idE = post.getElementsByTag("a").first();
+               if (idE != null) {
+                       Element dateE = idE.getElementsByTag("span").first();
+                       if (dateE != null) {
+                               return dateE.attr("data-epoch");
+                       }
+               }
+
+               return "";
+       }
+
+       @Override
+       protected Element getCommentContentElement(Element post) {
+               Element contentE = post.getElementsByClass("comment-content").first();
+               return contentE;
+       }
+
+       @Override
+       protected ElementProcessor getElementProcessorComment() {
+               return new BasicElementProcessor() {
+                       @Override
+                       public boolean ignoreNode(Node node) {
+                               if (node instanceof Element) {
+                                       Element el = (Element) node;
+                                       if ("h4".equals(el.tagName())) {
+                                               return true;
+                                       }
+                               }
+
+                               return false;
+                       }
+               };
+       }
+
+       private List<Element> getSubCommentElements(Element posts) {
+               List<Element> commentElements = new ArrayList<Element>();
+               if (posts != null) {
+                       for (Element possibleCommentElement : posts.children()) {
+                               if (possibleCommentElement.hasClass("comment")) {
+                                       commentElements.add(possibleCommentElement);
+                               }
+                       }
+               }
+
+               return commentElements;
+       }
+}
index ff758cb379bab6c910036ea31811295ba9b61dbb..614022c9a35533f166ce4b6e5025c98d85dc681c 100644 (file)
@@ -22,4 +22,6 @@ public enum Type {
        ERE_NUMERIQUE,
        /** EN: IT */
        PHORONIX,
+       /** FR: Any */
+       SEPT_SUR_SEPT,
 }
index ce16e3e69f97b889c22dfa45b019e81d10ba5b0e..0151ae2861362b1630b1f14feb7a0f90b942a5a8 100644 (file)
@@ -13,6 +13,7 @@ public class Test extends TestLauncher {
                addTest(new TestLWN(args));
                addTest(new TestSlashdot(args));
                addTest(new TestTooLinux(args));
+               addTest(new TestSeptSurSept(args));
        }
 
        public static void main(String[] args) {
diff --git a/src/be/nikiroo/gofetch/test/TestSeptSurSept.java b/src/be/nikiroo/gofetch/test/TestSeptSurSept.java
new file mode 100644 (file)
index 0000000..7ecece4
--- /dev/null
@@ -0,0 +1,29 @@
+package be.nikiroo.gofetch.test;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+
+import be.nikiroo.gofetch.support.SeptSurSept;
+import be.nikiroo.gofetch.support.Type;
+
+public class TestSeptSurSept extends TestBase {
+       static private Type type = Type.SEPT_SUR_SEPT;
+       static private TestBase base = null;
+
+       public TestSeptSurSept(String[] args) {
+               super(new SeptSurSept() {
+                       @Override
+                       protected InputStream open(URL url) throws IOException {
+                               return base.download(url);
+                       }
+
+                       @Override
+                       public Type getType() {
+                               return type;
+                       }
+               }, args);
+
+               base = this;
+       }
+}