- Element contentE = post.getElementsByClass("body").first();
- if (contentE != null) {
- for (String line : toLines(contentE,
- new BasicElementProcessor() {
- @Override
- public boolean ignoreNode(Node node) {
- // TODO: ignore headlines/pub
- if (node instanceof Element) {
- Element el = (Element)node;
- if ("h4".equals(el.tagName())) {
- return true;
- }
- }
-
- return false;
- }
- })) {
- content.add(line);
+ return "";
+ }
+
+ @Override
+ protected Element getFullArticle(Document doc) {
+ return doc.getElementById("body");
+ }
+
+ @Override
+ protected List<Element> getFullArticleCommentPosts(Document doc, URL intUrl) {
+ List<Element> commentElements = new ArrayList<Element>();
+
+ // Get comments URL then parse it
+ try {
+ URL url = new URL("https://forums.theregister.co.uk/forum/1"
+ + intUrl.getPath());
+ InputStream in = downloader.open(url);
+ try {
+ doc = DataUtil.load(in, "UTF-8", url.toString());
+ Element posts = doc.getElementById("forum_posts");
+ if (posts != null) {
+ for (Element post : posts.getElementsByClass("post")) {
+ commentElements.add(post);
+ Element inReplyTo = post.getElementsByClass(
+ "in-reply-to").first();
+ if (inReplyTo != null) {
+ String parentId = inReplyTo.absUrl("href");
+ if (parentId != null && parentId.contains("/")) {
+ int i = parentId.lastIndexOf('/');
+ parentId = parentId.substring(i + 1);
+
+ commentReplies
+ .put(getCommentId(post), parentId);
+ }