From 9cf08a99ce4a796a2294fa1f14315aa16d97c3ce Mon Sep 17 00:00:00 2001 From: Niki Roo Date: Wed, 26 Sep 2018 20:02:19 +0200 Subject: [PATCH] Add support for 7sur7.be --- src/be/nikiroo/gofetch/Main.java | 2 +- .../nikiroo/gofetch/support/BasicSupport.java | 42 +++- .../nikiroo/gofetch/support/SeptSurSept.java | 232 ++++++++++++++++++ src/be/nikiroo/gofetch/support/Type.java | 2 + src/be/nikiroo/gofetch/test/Test.java | 1 + .../nikiroo/gofetch/test/TestSeptSurSept.java | 29 +++ 6 files changed, 304 insertions(+), 4 deletions(-) create mode 100644 src/be/nikiroo/gofetch/support/SeptSurSept.java create mode 100644 src/be/nikiroo/gofetch/test/TestSeptSurSept.java diff --git a/src/be/nikiroo/gofetch/Main.java b/src/be/nikiroo/gofetch/Main.java index e4078d8..1672c88 100644 --- a/src/be/nikiroo/gofetch/Main.java +++ b/src/be/nikiroo/gofetch/Main.java @@ -105,4 +105,4 @@ public class Main { new Fetcher(dir, preselector, type, maxStories, hostname, port).start(); } -} \ No newline at end of file +} diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java index a748262..dcd5e6e 100644 --- a/src/be/nikiroo/gofetch/support/BasicSupport.java +++ b/src/be/nikiroo/gofetch/support/BasicSupport.java @@ -8,7 +8,9 @@ import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Date; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Map.Entry; import org.jsoup.helper.DataUtil; @@ -32,14 +34,31 @@ import be.nikiroo.utils.StringUtils; */ public abstract class BasicSupport { /** - * The downloader to use for all websites via {@link BasicSupport#open(URL)} + * The downloader to use for all web sites via + * {@link BasicSupport#open(URL)} */ static private Downloader downloader = new Downloader("gofetcher"); static private String preselector; + /** + * The optional cookies to use to get the site data. + */ + private Map cookies = new HashMap(); + private Type type; + /** + * Login on the web site (this method does nothing by default, but can be + * overridden if needed). + * + * @throws IOException + * in case of I/O error + * + */ + public void login() throws IOException { + } + /** * The website textual description, to add in the dispatcher page. *

@@ -82,6 +101,7 @@ public abstract class BasicSupport { public List list() throws IOException { List list = new ArrayList(); + login(); for (Entry entry : getUrls()) { URL url = entry.getKey(); String defaultCateg = entry.getValue(); @@ -312,7 +332,8 @@ public abstract class BasicSupport { } /** - * Return the full article if available. + * Return the full article if available (this is the article to retrieve + * from the newly downloaded page at {@link Story#getUrlInternal()}). * * @param doc * the (full article) document to work on @@ -362,7 +383,7 @@ public abstract class BasicSupport { * in case of I/O error */ protected InputStream open(URL url) throws IOException { - return downloader.open(url); + return downloader.open(url, url, cookies, null, null, null); } /** @@ -503,6 +524,18 @@ public abstract class BasicSupport { this.type = type; } + /** + * Add a cookie for all site connections. + * + * @param name + * the cookie name + * @param value + * the value + */ + protected void addCookie(String name, String value) { + cookies.put(name, value); + } + /** * The {@link String} to append to the selector (the selector will be * constructed as "this string" then "/type/". @@ -552,6 +585,9 @@ public abstract class BasicSupport { case PHORONIX: support = new Phoronix(); break; + case SEPT_SUR_SEPT: + support = new SeptSurSept(); + break; } if (support != null) { diff --git a/src/be/nikiroo/gofetch/support/SeptSurSept.java b/src/be/nikiroo/gofetch/support/SeptSurSept.java new file mode 100644 index 0000000..6134f57 --- /dev/null +++ b/src/be/nikiroo/gofetch/support/SeptSurSept.java @@ -0,0 +1,232 @@ +package be.nikiroo.gofetch.support; + +import java.io.IOException; +import java.net.URL; +import java.util.AbstractMap; +import java.util.ArrayList; +import java.util.List; +import java.util.Map.Entry; + +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; + +/** + * Support https://www.7sur7.be/. + * + * @author niki + */ +public class SeptSurSept extends BasicSupport { + @Override + public String getDescription() { + return "7SUR7.be Info, sport et showbiz, 24/24, 7/7"; + } + + @Override + public void login() throws IOException { + addCookie("pwrefr2", ""); + addCookie("pwv-atXMVFeyFP1Ki09i", "1"); + addCookie("pwg-atXMVFeyFP1Ki09i", "basic"); + + addCookie("pwv", "1"); + addCookie("pw", "functional"); + + URL url = new URL("https://www.7sur7.be/7s7/privacy/callback.do" + + "?redirectUri=/" + "&pwv=1" + "&pws=functional%7Canalytics" + + "&days=3650" + "&referrer="); + + open(url).close(); + } + + @Override + protected List> getUrls() throws IOException { + List> urls = new ArrayList>(); + URL url = new URL("https://www.7sur7.be/"); + urls.add(new AbstractMap.SimpleEntry(url, "")); + + return urls; + } + + @Override + protected List getArticles(Document doc) { + return doc.getElementsByClass("clip"); + } + + @Override + protected String getArticleId(Document doc, Element article) { + String id = article.attr("id"); + if (id != null && id.startsWith("clip")) { + return id.substring("clip".length()); + } + + return null; + } + + @Override + protected String getArticleTitle(Document doc, Element article) { + return article.attr("data-title"); + } + + @Override + protected String getArticleAuthor(Document doc, Element article) { + return ""; + } + + @Override + protected String getArticleDate(Document doc, Element article) { + return article.attr("data-date"); + } + + @Override + protected String getArticleCategory(Document doc, Element article, + String currentCategory) { + Element parent = article.parent(); + if (parent != null) { + Element catElement = parent.previousElementSibling(); + if (catElement != null) { + return catElement.text(); + } + } + + return ""; + } + + @Override + protected String getArticleDetails(Document doc, Element article) { + return ""; + } + + @Override + protected String getArticleIntUrl(Document doc, Element article) { + return article.absUrl("href"); + } + + @Override + protected String getArticleExtUrl(Document doc, Element article) { + return ""; + } + + @Override + protected String getArticleContent(Document doc, Element article) { + return article.attr("data-intro").trim(); + } + + @Override + protected Element getFullArticle(Document doc) { + return doc.getElementById("detail_content"); + } + + @Override + protected List getFullArticleCommentPosts(Document doc, URL intUrl) { + return getSubCommentElements(doc.getElementsByClass("comment-list") + .first()); + } + + @Override + protected ElementProcessor getElementProcessorFullArticle() { + return new BasicElementProcessor() { + @Override + public boolean ignoreNode(Node node) { + return node.attr("class").contains("chapo"); + } + + @Override + public String isSubtitle(Node node) { + if (node instanceof Element) { + Element element = (Element) node; + if (element.tagName().startsWith("h") + && element.tagName().length() == 2) { + return element.text(); + } + } + return null; + } + }; + } + + @Override + protected List getCommentCommentPosts(Document doc, + Element container) { + return getSubCommentElements(container.getElementsByClass("children") + .first()); + } + + @Override + protected String getCommentId(Element post) { + Element idE = post.getElementsByTag("a").first(); + if (idE != null) { + return idE.attr("id"); + } + + return ""; + } + + @Override + protected String getCommentAuthor(Element post) { + // Since we have no title, we switch with author + return ""; + } + + @Override + protected String getCommentTitle(Element post) { + // Since we have no title, we switch with author + Element authorE = post.getElementsByTag("footer").first(); + if (authorE != null) { + authorE = authorE.getElementsByTag("cite").first(); + } + if (authorE != null) { + return authorE.text(); + } + + return ""; + } + + @Override + protected String getCommentDate(Element post) { + Element idE = post.getElementsByTag("a").first(); + if (idE != null) { + Element dateE = idE.getElementsByTag("span").first(); + if (dateE != null) { + return dateE.attr("data-epoch"); + } + } + + return ""; + } + + @Override + protected Element getCommentContentElement(Element post) { + Element contentE = post.getElementsByClass("comment-content").first(); + return contentE; + } + + @Override + protected ElementProcessor getElementProcessorComment() { + return new BasicElementProcessor() { + @Override + public boolean ignoreNode(Node node) { + if (node instanceof Element) { + Element el = (Element) node; + if ("h4".equals(el.tagName())) { + return true; + } + } + + return false; + } + }; + } + + private List getSubCommentElements(Element posts) { + List commentElements = new ArrayList(); + if (posts != null) { + for (Element possibleCommentElement : posts.children()) { + if (possibleCommentElement.hasClass("comment")) { + commentElements.add(possibleCommentElement); + } + } + } + + return commentElements; + } +} diff --git a/src/be/nikiroo/gofetch/support/Type.java b/src/be/nikiroo/gofetch/support/Type.java index ff758cb..614022c 100644 --- a/src/be/nikiroo/gofetch/support/Type.java +++ b/src/be/nikiroo/gofetch/support/Type.java @@ -22,4 +22,6 @@ public enum Type { ERE_NUMERIQUE, /** EN: IT */ PHORONIX, + /** FR: Any */ + SEPT_SUR_SEPT, } diff --git a/src/be/nikiroo/gofetch/test/Test.java b/src/be/nikiroo/gofetch/test/Test.java index ce16e3e..0151ae2 100644 --- a/src/be/nikiroo/gofetch/test/Test.java +++ b/src/be/nikiroo/gofetch/test/Test.java @@ -13,6 +13,7 @@ public class Test extends TestLauncher { addTest(new TestLWN(args)); addTest(new TestSlashdot(args)); addTest(new TestTooLinux(args)); + addTest(new TestSeptSurSept(args)); } public static void main(String[] args) { diff --git a/src/be/nikiroo/gofetch/test/TestSeptSurSept.java b/src/be/nikiroo/gofetch/test/TestSeptSurSept.java new file mode 100644 index 0000000..7ecece4 --- /dev/null +++ b/src/be/nikiroo/gofetch/test/TestSeptSurSept.java @@ -0,0 +1,29 @@ +package be.nikiroo.gofetch.test; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; + +import be.nikiroo.gofetch.support.SeptSurSept; +import be.nikiroo.gofetch.support.Type; + +public class TestSeptSurSept extends TestBase { + static private Type type = Type.SEPT_SUR_SEPT; + static private TestBase base = null; + + public TestSeptSurSept(String[] args) { + super(new SeptSurSept() { + @Override + protected InputStream open(URL url) throws IOException { + return base.download(url); + } + + @Override + public Type getType() { + return type; + } + }, args); + + base = this; + } +} -- 2.27.0