import java.util.ArrayList;
import java.util.Arrays;
import java.util.Date;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import java.util.Map.Entry;
import org.jsoup.helper.DataUtil;
* @author niki
*/
public abstract class BasicSupport {
- /** The downloader to use for all websites. */
- static protected Downloader downloader = new Downloader("gofetcher");
+ /**
+ * The downloader to use for all web sites via
+ * {@link BasicSupport#open(URL)}
+ */
+ static private Downloader downloader = new Downloader("gofetcher");
static private String preselector;
+ /**
+ * The optional cookies to use to get the site data.
+ */
+ private Map<String, String> cookies = new HashMap<String, String>();
+
private Type type;
+ /**
+ * Login on the web site (this method does nothing by default, but can be
+ * overridden if needed).
+ *
+ * @throws IOException
+ * in case of I/O error
+ *
+ */
+ public void login() throws IOException {
+ }
+
/**
* The website textual description, to add in the dispatcher page.
* <p>
* @return the selector
*/
public String getSelector() {
- return getSelector(type);
+ return getSelector(getType());
}
/**
public List<Story> list() throws IOException {
List<Story> list = new ArrayList<Story>();
+ login();
for (Entry<URL, String> entry : getUrls()) {
URL url = entry.getKey();
String defaultCateg = entry.getValue();
defaultCateg = "";
}
- InputStream in = downloader.open(url);
+ InputStream in = open(url);
Document doc = DataUtil.load(in, "UTF-8", url.toString());
List<Element> articles = getArticles(doc);
for (Element article : articles) {
id = "0" + id;
}
} else {
- id = date.replace(":", "_").replace("+", "_");
+ id = date.replace(":", "_").replace("+", "_").replace("/", "-");
}
-
+
date = date(date);
list.add(new Story(getType(), id, title, author, date, categ,
String fullContent = "";
URL url = new URL(story.getUrlInternal());
- InputStream in = downloader.open(url);
+ InputStream in = open(url);
try {
Document doc = DataUtil.load(in, "UTF-8", url.toString());
Element article = getFullArticle(doc);
if (article != null) {
- StringBuilder builder = new StringBuilder();
- ElementProcessor eProc = getElementProcessorFullArticle();
- if (eProc != null) {
- for (String line : toLines(article, eProc)) {
- builder.append(line + "\n");
- }
- } else {
- builder.append(article.text());
- }
-
- // Content is too tight with a single break per line:
- fullContent = builder.toString().replace("\n", "\n\n") //
- .replace("\n\n\n\n", "\n\n") //
- .replace("\n\n\n\n", "\n\n") //
- .trim();
+ fullContent = getArticleText(article);
}
if (fullContent.isEmpty()) {
}
/**
- * Return the full article if available.
+ * Return the text from this {@link Element}, using the
+ * {@link BasicSupport#getElementProcessorFullArticle()} processor logic.
+ *
+ * @param article
+ * the element to extract the text from
+ *
+ * @return the text
+ */
+ protected String getArticleText(Element article) {
+ StringBuilder builder = new StringBuilder();
+ ElementProcessor eProc = getElementProcessorFullArticle();
+ if (eProc != null) {
+ for (String line : toLines(article, eProc)) {
+ builder.append(line + "\n");
+ }
+ } else {
+ builder.append(article.text());
+ }
+
+ // Content is too tight with a single break per line:
+ return builder.toString().replace("\n", "\n\n") //
+ .replace("\n\n\n\n", "\n\n") //
+ .replace("\n\n\n\n", "\n\n") //
+ .trim();
+ }
+
+ /**
+ * Return the full article if available (this is the article to retrieve
+ * from the newly downloaded page at {@link Story#getUrlInternal()}).
*
* @param doc
* the (full article) document to work on
*/
abstract protected ElementProcessor getElementProcessorFullArticle();
+ /**
+ * Open a network resource.
+ * <p>
+ * You need to close the returned {@link InputStream} when done.
+ *
+ * @param url
+ * the source to open
+ *
+ * @return the content
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ protected InputStream open(URL url) throws IOException {
+ return downloader.open(url, url, cookies, null, null, null);
+ }
+
/**
* Convert the comment elements into {@link Comment}s
*
this.type = type;
}
+ /**
+ * Add a cookie for all site connections.
+ *
+ * @param name
+ * the cookie name
+ * @param value
+ * the value
+ */
+ protected void addCookie(String name, String value) {
+ cookies.put(name, value);
+ }
+
/**
* The {@link String} to append to the selector (the selector will be
* constructed as "this string" then "/type/".
case PHORONIX:
support = new Phoronix();
break;
+ case SEPT_SUR_SEPT:
+ support = new SeptSurSept();
+ break;
+ case REDDIT:
+ support = new Reddit();
+ break;
}
if (support != null) {
Date dat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXXX")
.parse(date.trim());
return out.format(dat);
- } catch (ParseException e) {
+ } catch (Exception e) {
return date;
}
}