import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.Map.Entry;
import org.jsoup.helper.DataUtil;
*/
public abstract class BasicSupport {
/**
- * The downloader to use for all websites via {@link BasicSupport#open(URL)}
+ * The downloader to use for all web sites via
+ * {@link BasicSupport#open(URL)}
*/
static private Downloader downloader = new Downloader("gofetcher");
static private String preselector;
+ /**
+ * The optional cookies to use to get the site data.
+ */
+ private Map<String, String> cookies = new HashMap<String, String>();
+
private Type type;
+ /**
+ * Login on the web site (this method does nothing by default, but can be
+ * overridden if needed).
+ *
+ * @throws IOException
+ * in case of I/O error
+ *
+ */
+ public void login() throws IOException {
+ }
+
/**
* The website textual description, to add in the dispatcher page.
* <p>
public List<Story> list() throws IOException {
List<Story> list = new ArrayList<Story>();
+ login();
for (Entry<URL, String> entry : getUrls()) {
URL url = entry.getKey();
String defaultCateg = entry.getValue();
Document doc = DataUtil.load(in, "UTF-8", url.toString());
Element article = getFullArticle(doc);
if (article != null) {
- StringBuilder builder = new StringBuilder();
- ElementProcessor eProc = getElementProcessorFullArticle();
- if (eProc != null) {
- for (String line : toLines(article, eProc)) {
- builder.append(line + "\n");
- }
- } else {
- builder.append(article.text());
- }
-
- // Content is too tight with a single break per line:
- fullContent = builder.toString().replace("\n", "\n\n") //
- .replace("\n\n\n\n", "\n\n") //
- .replace("\n\n\n\n", "\n\n") //
- .trim();
+ fullContent = getArticleText(article);
}
if (fullContent.isEmpty()) {
}
/**
- * Return the full article if available.
+ * Return the text from this {@link Element}, using the
+ * {@link BasicSupport#getElementProcessorFullArticle()} processor logic.
+ *
+ * @param article
+ * the element to extract the text from
+ *
+ * @return the text
+ */
+ protected String getArticleText(Element article) {
+ StringBuilder builder = new StringBuilder();
+ ElementProcessor eProc = getElementProcessorFullArticle();
+ if (eProc != null) {
+ for (String line : toLines(article, eProc)) {
+ builder.append(line + "\n");
+ }
+ } else {
+ builder.append(article.text());
+ }
+
+ // Content is too tight with a single break per line:
+ return builder.toString().replace("\n", "\n\n") //
+ .replace("\n\n\n\n", "\n\n") //
+ .replace("\n\n\n\n", "\n\n") //
+ .trim();
+ }
+
+ /**
+ * Return the full article if available (this is the article to retrieve
+ * from the newly downloaded page at {@link Story#getUrlInternal()}).
*
* @param doc
* the (full article) document to work on
* in case of I/O error
*/
protected InputStream open(URL url) throws IOException {
- return downloader.open(url);
+ return downloader.open(url, url, cookies, null, null, null);
}
/**
this.type = type;
}
+ /**
+ * Add a cookie for all site connections.
+ *
+ * @param name
+ * the cookie name
+ * @param value
+ * the value
+ */
+ protected void addCookie(String name, String value) {
+ cookies.put(name, value);
+ }
+
/**
* The {@link String} to append to the selector (the selector will be
* constructed as "this string" then "/type/".
case PHORONIX:
support = new Phoronix();
break;
+ case SEPT_SUR_SEPT:
+ support = new SeptSurSept();
+ break;
}
if (support != null) {