import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
-import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
*/
public abstract class BasicSupport {
/**
- * The downloader to use for all web sites via
+ * The {@link Downloader} to use for all web sites via
* {@link BasicSupport#open(URL)}
*/
static private Downloader downloader = new Downloader("gofetcher");
* in case of I/O error
*
*/
+ @SuppressWarnings("unused")
public void login() throws IOException {
}
id = "0" + id;
}
} else {
- id = date.replace(":", "_").replace("+", "_");
+ id = date.replace(":", "_").replace("+", "_")
+ .replace("/", "-");
}
date = date(date);
Document doc = DataUtil.load(in, "UTF-8", url.toString());
Element article = getFullArticle(doc);
if (article != null) {
- StringBuilder builder = new StringBuilder();
- ElementProcessor eProc = getElementProcessorFullArticle();
- if (eProc != null) {
- for (String line : toLines(article, eProc)) {
- builder.append(line + "\n");
- }
- } else {
- builder.append(article.text());
- }
-
- // Content is too tight with a single break per line:
- fullContent = builder.toString().replace("\n", "\n\n") //
- .replace("\n\n\n\n", "\n\n") //
- .replace("\n\n\n\n", "\n\n") //
- .trim();
+ fullContent = getArticleText(article);
}
if (fullContent.isEmpty()) {
}
}
+ /**
+ * Return the text from this {@link Element}, using the
+ * {@link BasicSupport#getElementProcessorFullArticle()} processor logic.
+ *
+ * @param article
+ * the element to extract the text from
+ *
+ * @return the text
+ */
+ protected String getArticleText(Element article) {
+ StringBuilder builder = new StringBuilder();
+ ElementProcessor eProc = getElementProcessorFullArticle();
+ if (eProc != null) {
+ for (String line : toLines(article, eProc)) {
+ builder.append(line + "\n");
+ }
+ } else {
+ builder.append(article.text());
+ }
+
+ // Content is too tight with a single break per line:
+ return builder.toString().replace("\n", "\n\n") //
+ .replace("\n\n\n\n", "\n\n") //
+ .replace("\n\n\n\n", "\n\n") //
+ .trim();
+ }
+
/**
* Return the full article if available (this is the article to retrieve
* from the newly downloaded page at {@link Story#getUrlInternal()}).
case SLASHDOT:
support = new Slashdot();
break;
- case PIPEDOT:
- support = new Pipedot();
- break;
case LWN:
support = new LWN();
break;
case SEPT_SUR_SEPT:
support = new SeptSurSept();
break;
+ case REDDIT:
+ support = new Reddit();
+ break;
}
if (support != null) {
Date dat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXXX")
.parse(date.trim());
return out.format(dat);
- } catch (ParseException e) {
+ } catch (Exception e) {
return date;
}
}