X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Fgofetch%2Fsupport%2FBasicSupport.java;h=9555b9db363a2bdffe408d1698db0bb35f73d380;hb=21f1a997d7addd93226243b0d6d80ce8afcde153;hp=a59ae313fb1f1fda8979020d7e6315d81ba6592e;hpb=3e62b034c1981ae6329f06b3f8c0ee25c3683789;p=gofetch.git diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java index a59ae31..9555b9d 100644 --- a/src/be/nikiroo/gofetch/support/BasicSupport.java +++ b/src/be/nikiroo/gofetch/support/BasicSupport.java @@ -3,12 +3,13 @@ package be.nikiroo.gofetch.support; import java.io.IOException; import java.io.InputStream; import java.net.URL; -import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Date; +import java.util.HashMap; import java.util.List; +import java.util.Map; import java.util.Map.Entry; import org.jsoup.helper.DataUtil; @@ -31,13 +32,33 @@ import be.nikiroo.utils.StringUtils; * @author niki */ public abstract class BasicSupport { - /** The downloader to use for all websites. */ - protected static Downloader downloader = new Downloader("gofetcher"); + /** + * The {@link Downloader} to use for all web sites via + * {@link BasicSupport#open(URL)} + */ + static private Downloader downloader = new Downloader("gofetcher"); static private String preselector; + /** + * The optional cookies to use to get the site data. + */ + private Map cookies = new HashMap(); + private Type type; + /** + * Login on the web site (this method does nothing by default, but can be + * overridden if needed). + * + * @throws IOException + * in case of I/O error + * + */ + @SuppressWarnings("unused") + public void login() throws IOException { + } + /** * The website textual description, to add in the dispatcher page. *

@@ -55,7 +76,7 @@ public abstract class BasicSupport { * @return the selector */ public String getSelector() { - return getSelector(type); + return getSelector(getType()); } /** @@ -80,6 +101,7 @@ public abstract class BasicSupport { public List list() throws IOException { List list = new ArrayList(); + login(); for (Entry entry : getUrls()) { URL url = entry.getKey(); String defaultCateg = entry.getValue(); @@ -87,7 +109,7 @@ public abstract class BasicSupport { defaultCateg = ""; } - InputStream in = downloader.open(url); + InputStream in = open(url); Document doc = DataUtil.load(in, "UTF-8", url.toString()); List articles = getArticles(doc); for (Element article : articles) { @@ -106,8 +128,13 @@ public abstract class BasicSupport { continue; } - if (id.isEmpty()) { - id = date.replace(":", "_").replace("+", "_"); + if (!id.isEmpty()) { + while (id.length() < 10) { + id = "0" + id; + } + } else { + id = date.replace(":", "_").replace("+", "_") + .replace("/", "-"); } date = date(date); @@ -269,26 +296,12 @@ public abstract class BasicSupport { String fullContent = ""; URL url = new URL(story.getUrlInternal()); - InputStream in = downloader.open(url); + InputStream in = open(url); try { Document doc = DataUtil.load(in, "UTF-8", url.toString()); Element article = getFullArticle(doc); if (article != null) { - StringBuilder builder = new StringBuilder(); - ElementProcessor eProc = getElementProcessorFullArticle(); - if (eProc != null) { - for (String line : toLines(article, eProc)) { - builder.append(line + "\n"); - } - } else { - builder.append(article.text()); - } - - // Content is too tight with a single break per line: - fullContent = builder.toString().replace("\n", "\n\n") // - .replace("\n\n\n\n", "\n\n") // - .replace("\n\n\n\n", "\n\n") // - .trim(); + fullContent = getArticleText(article); } if (fullContent.isEmpty()) { @@ -306,7 +319,35 @@ public abstract class BasicSupport { } /** - * Return the full article if available. + * Return the text from this {@link Element}, using the + * {@link BasicSupport#getElementProcessorFullArticle()} processor logic. + * + * @param article + * the element to extract the text from + * + * @return the text + */ + protected String getArticleText(Element article) { + StringBuilder builder = new StringBuilder(); + ElementProcessor eProc = getElementProcessorFullArticle(); + if (eProc != null) { + for (String line : toLines(article, eProc)) { + builder.append(line + "\n"); + } + } else { + builder.append(article.text()); + } + + // Content is too tight with a single break per line: + return builder.toString().replace("\n", "\n\n") // + .replace("\n\n\n\n", "\n\n") // + .replace("\n\n\n\n", "\n\n") // + .trim(); + } + + /** + * Return the full article if available (this is the article to retrieve + * from the newly downloaded page at {@link Story#getUrlInternal()}). * * @param doc * the (full article) document to work on @@ -342,6 +383,23 @@ public abstract class BasicSupport { */ abstract protected ElementProcessor getElementProcessorFullArticle(); + /** + * Open a network resource. + *

+ * You need to close the returned {@link InputStream} when done. + * + * @param url + * the source to open + * + * @return the content + * + * @throws IOException + * in case of I/O error + */ + protected InputStream open(URL url) throws IOException { + return downloader.open(url, url, cookies, null, null, null); + } + /** * Convert the comment elements into {@link Comment}s * @@ -480,6 +538,18 @@ public abstract class BasicSupport { this.type = type; } + /** + * Add a cookie for all site connections. + * + * @param name + * the cookie name + * @param value + * the value + */ + protected void addCookie(String name, String value) { + cookies.put(name, value); + } + /** * The {@link String} to append to the selector (the selector will be * constructed as "this string" then "/type/". @@ -508,9 +578,6 @@ public abstract class BasicSupport { case SLASHDOT: support = new Slashdot(); break; - case PIPEDOT: - support = new Pipedot(); - break; case LWN: support = new LWN(); break; @@ -526,6 +593,15 @@ public abstract class BasicSupport { case ERE_NUMERIQUE: support = new EreNumerique(); break; + case PHORONIX: + support = new Phoronix(); + break; + case SEPT_SUR_SEPT: + support = new SeptSurSept(); + break; + case REDDIT: + support = new Reddit(); + break; } if (support != null) { @@ -744,7 +820,7 @@ public abstract class BasicSupport { Date dat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXXX") .parse(date.trim()); return out.format(dat); - } catch (ParseException e) { + } catch (Exception e) { return date; } }