+ /**
+ * List all the recent items, but only assure the ID and internal URL to
+ * fetch it later on (until it has been fetched, the rest of the
+ * {@link Story} is not confirmed).
+ *
+ * @return the list of new stories
+ *
+ * @throws IOException
+ * in case of I/O
+ */
+ public List<Story> list() throws IOException {
+ List<Story> list = new ArrayList<Story>();
+
+ for (Entry<URL, String> entry : getUrls()) {
+ URL url = entry.getKey();
+ String defaultCateg = entry.getValue();
+ if (defaultCateg == null) {
+ defaultCateg = "";
+ }
+
+ InputStream in = downloader.open(url);
+ Document doc = DataUtil.load(in, "UTF-8", url.toString());
+ List<Element> articles = getArticles(doc);
+ for (Element article : articles) {
+ String id = getArticleId(doc, article).trim();
+ String title = getArticleTitle(doc, article).trim();
+ String author = getArticleAuthor(doc, article).trim();
+ String date = getArticleDate(doc, article).trim();
+ String categ = getArticleCategory(doc, article, defaultCateg)
+ .trim();
+ String details = getArticleDetails(doc, article).trim();
+ String intUrl = getArticleIntUrl(doc, article).trim();
+ String extUrl = getArticleExtUrl(doc, article).trim();
+ String content = getArticleContent(doc, article).trim();
+
+ if (id.isEmpty() && date.isEmpty()) {
+ continue;
+ }
+
+ if (id.isEmpty()) {
+ id = date.replace(":", "_").replace("+", "_");
+ }
+
+ date = date(date);
+
+ list.add(new Story(getType(), id, title, author, date, categ,
+ details, intUrl, extUrl, content));
+ }
+ }
+
+ return list;
+ }
+
+ /**
+ * The {@link URL}s to process for this website.
+ *
+ * @return the list of {@link URL}s
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ abstract protected List<Entry<URL, String>> getUrls() throws IOException;
+
+ /**
+ * The article {@link Element}s of this document.
+ *
+ * @param doc
+ * the main document for the current category
+ *
+ * @return the articles
+ */
+ abstract protected List<Element> getArticles(Document doc);
+
+ /**
+ * The ID of the article (defaults to the date element if empty).
+ *
+ * @param doc
+ * the main document for the current category
+ * @param article
+ * the article to look into
+ *
+ * @return the ID
+ */
+ abstract protected String getArticleId(Document doc, Element article);
+
+ /**
+ * The article title to display.
+ *
+ * @param doc
+ * the main document for the current category
+ * @param article
+ * the article to look into
+ *
+ * @return the title
+ */
+ abstract protected String getArticleTitle(Document doc, Element article);
+
+ /**
+ * The optional article author.
+ *
+ * @param doc
+ * the main document for the current category
+ * @param article
+ * the article to look into
+ *
+ * @return the author
+ */
+ abstract protected String getArticleAuthor(Document doc, Element article);
+
+ /**
+ * The optional article date.
+ *
+ * @param doc
+ * the main document for the current category
+ * @param article
+ * the article to look into
+ *
+ * @return the date
+ */
+ abstract protected String getArticleDate(Document doc, Element article);
+
+ /**
+ * the optional article category.
+ *
+ * @param doc
+ * the main document for the current category
+ * @param article
+ * the article to look into
+ * @param currentCategory
+ * the currently listed category if any (can be NULL)
+ *
+ * @return the category
+ */
+ abstract protected String getArticleCategory(Document doc, Element article,
+ String currentCategory);
+
+ /**
+ * the optional details of the article (can replace the date, author and
+ * category, for instance).
+ *
+ * @param doc
+ * the main document for the current category
+ * @param article
+ * the article to look into
+ *
+ * @return the details
+ */
+ abstract protected String getArticleDetails(Document doc, Element article);
+
+ /**
+ * The (required) {@link URL} that points to the news page on the supported
+ * website.
+ *
+ * @param doc
+ * the main document for the current category
+ * @param article
+ * the article to look into
+ *
+ * @return the internal {@link URL}
+ */
+ abstract protected String getArticleIntUrl(Document doc, Element article);
+
+ /**
+ * the optional {@link URL} that points to an external website for more
+ * information.
+ *
+ * @param doc
+ * the main document for the current category
+ * @param article
+ * the article to look into
+ *
+ * @return the external {@link URL}
+ */
+ abstract protected String getArticleExtUrl(Document doc, Element article);
+
+ /**
+ * The optional article short-content (not the full content, that will be
+ * fetched by {@link BasicSupport#fetch(Story)}).
+ *
+ * @param doc
+ * the main document for the current category
+ * @param article
+ * the article to look into
+ *
+ * @return the short content
+ */
+ abstract protected String getArticleContent(Document doc, Element article);
+
+ /**
+ * Fetch the full article content as well as all the comments associated to
+ * this {@link Story}, if any (can be empty, but not NULL).
+ *
+ * @param story
+ * the story to fetch the comments of
+ *
+ * @throws IOException
+ * in case of I/O error
+ */
+ public void fetch(Story story) throws IOException {
+ String fullContent = "";
+
+ URL url = new URL(story.getUrlInternal());
+ InputStream in = downloader.open(url);
+ try {
+ Document doc = DataUtil.load(in, "UTF-8", url.toString());
+ Element article = getFullArticle(doc);
+ if (article != null) {
+ StringBuilder builder = new StringBuilder();
+ ElementProcessor eProc = getElementProcessorFullArticle();
+ if (eProc != null) {
+ for (String line : toLines(article, eProc)) {
+ builder.append(line + "\n");
+ }
+ } else {
+ builder.append(article.text());
+ }
+
+ // Content is too tight with a single break per line:
+ fullContent = builder.toString().replace("\n", "\n\n") //
+ .replace("\n\n\n\n", "\n\n") //
+ .replace("\n\n\n\n", "\n\n") //
+ .trim();
+ }
+
+ if (fullContent.isEmpty()) {
+ fullContent = story.getContent();
+ }
+
+ story.setFullContent(fullContent);
+ story.setComments(getComments(doc,
+ getFullArticleCommentPosts(doc, url)));
+ } finally {
+ if (in != null) {
+ in.close();
+ }
+ }
+ }
+
+ /**
+ * Return the full article if available.
+ *
+ * @param doc
+ * the (full article) document to work on
+ *
+ * @return the article or NULL
+ */
+ abstract protected Element getFullArticle(Document doc);
+
+ /**
+ * Return the list of comment {@link Element}s from this optional container
+ * -- must <b>NOT</b> return the "container" as a comment {@link Element}.
+ *
+ * @param doc
+ * the (full article) document to work on
+ * @param intUrl
+ * the internal {@link URL} this article wa taken from (the
+ * {@link URL} from the supported website)
+ *
+ * @return the list of comment posts
+ */
+ abstract protected List<Element> getFullArticleCommentPosts(Document doc,
+ URL intUrl);
+
+ /**
+ * The {@link ElementProcessor} to use to convert the main article element
+ * (see {@link BasicSupport#getFullArticle(Document)}) into text.
+ * <p>
+ * See {@link BasicElementProcessor} for a working, basic implementation.
+ * <p>
+ * Can be NULL to simply use {@link Element#text()}.
+ *
+ * @return the processor, or NULL
+ */
+ abstract protected ElementProcessor getElementProcessorFullArticle();
+
+ /**
+ * Convert the comment elements into {@link Comment}s
+ *
+ * @param doc
+ * the document we work on
+ * @param posts
+ * the comment elements
+ *
+ * @return the converted {@link Comment}s
+ */
+ private List<Comment> getComments(Document doc, List<Element> posts) {
+ List<Comment> comments = new ArrayList<Comment>();
+ if (posts != null) {
+ for (Element post : posts) {
+ String id = getCommentId(post).trim();
+ String author = getCommentAuthor(post).trim();
+ String title = getCommentTitle(post).trim();
+ String date = getCommentDate(post).trim();
+
+ List<String> content = new ArrayList<String>();
+
+ if (id.isEmpty()) {
+ id = date;
+ }
+
+ date = date(date);
+
+ Element contentE = getCommentContentElement(post);
+ if (contentE != null) {
+ ElementProcessor eProc = getElementProcessorComment();
+ if (eProc != null) {
+ for (String line : toLines(contentE, eProc)) {
+ content.add(line);
+ }
+ } else {
+ content = Arrays.asList(contentE.text().split("\n"));
+ }
+ }
+
+ Comment comment = new Comment(id, author, title, date, content);
+ comment.addAll(getComments(doc,
+ getCommentCommentPosts(doc, post)));
+
+ if (!comment.isEmpty()) {
+ comments.add(comment);
+ }
+ }
+ }
+
+ return comments;
+ }
+
+ /**
+ * Return the list of subcomment {@link Element}s from this comment element
+ * -- must <b>NOT</b> return the "container" as a comment {@link Element}.
+ *
+ * @param doc
+ * the (full article) document to work on
+ * @param container
+ * the container (a comment {@link Element})
+ *
+ * @return the list of comment posts
+ */
+ abstract protected List<Element> getCommentCommentPosts(Document doc,
+ Element container);
+
+ /**
+ * Compute the ID of the given comment element.
+ *
+ * @param post
+ * the comment element
+ *
+ * @return the ID
+ */
+ abstract protected String getCommentId(Element post);
+
+ /**
+ * Compute the author of the given comment element.
+ *
+ * @param post
+ * the comment element
+ *
+ * @return the author
+ */
+ abstract protected String getCommentAuthor(Element post);
+
+ /**
+ * Compute the title of the given comment element.
+ *
+ * @param post
+ * the comment element
+ *
+ * @return the title
+ */
+ abstract protected String getCommentTitle(Element post);
+
+ /**
+ * Compute the date of the given comment element.
+ *
+ * @param post
+ * the comment element
+ *
+ * @return the date
+ */
+ abstract protected String getCommentDate(Element post);
+
+ /**
+ * Get the main of the given comment element, which can be NULL.
+ *
+ * @param post
+ * the comment element
+ *
+ * @return the element
+ */
+ abstract protected Element getCommentContentElement(Element post);
+
+ /**
+ * The {@link ElementProcessor} to use to convert the main comment element
+ * (see {@link BasicSupport#getCommentContentElement(Element)}) into text.
+ * <p>
+ * See {@link BasicElementProcessor} for a working, basic implementation.
+ * <p>
+ * Can be NULL to simply use {@link Element#text()}.
+ *
+ * @return the processor
+ */
+ abstract protected ElementProcessor getElementProcessorComment();
+
+ /**
+ * The support type.
+ *
+ * @param type
+ * the new type
+ */