First version (slashdot supported)
authorNiki Roo <niki@nikiroo.be>
Sat, 5 Aug 2017 21:53:59 +0000 (23:53 +0200)
committerNiki Roo <niki@nikiroo.be>
Sat, 5 Aug 2017 21:53:59 +0000 (23:53 +0200)
17 files changed:
Makefile.base [new file with mode: 0644]
README.md [new file with mode: 0644]
VERSION [new file with mode: 0644]
changelog.md [new file with mode: 0644]
configure.sh [new file with mode: 0755]
libs/jsoup-1.10.3-sources.jar [new file with mode: 0644]
libs/nikiroo-utils-2.1.0-sources.jar [new file with mode: 0644]
src/be/nikiroo/gofetch/Fetcher.java [new file with mode: 0644]
src/be/nikiroo/gofetch/Main.java [new file with mode: 0644]
src/be/nikiroo/gofetch/StringJustifier.java [new file with mode: 0644]
src/be/nikiroo/gofetch/data/Comment.java [new file with mode: 0644]
src/be/nikiroo/gofetch/data/Story.java [new file with mode: 0644]
src/be/nikiroo/gofetch/output/Gopher.java [new file with mode: 0644]
src/be/nikiroo/gofetch/output/Html.java [new file with mode: 0644]
src/be/nikiroo/gofetch/output/Output.java [new file with mode: 0644]
src/be/nikiroo/gofetch/support/BasicSupport.java [new file with mode: 0644]
src/be/nikiroo/gofetch/support/Slashdot.java [new file with mode: 0644]

diff --git a/Makefile.base b/Makefile.base
new file mode 100644 (file)
index 0000000..3304c83
--- /dev/null
@@ -0,0 +1,163 @@
+# Required parameters (the commented out ones are supposed to change per project):
+
+#MAIN = path to main java source to compile
+#MORE = path to supplementary needed resources not linked from MAIN
+#NAME = name of project (used for jar output file)
+#PREFIX = usually /usr/local (where to install the program)
+#TEST = path to main test source to compile
+#JAR_FLAGS += a list of things to pack, each usually prefixed with "-C bin/"
+#SJAR_FLAGS += a list of things to pack, each usually prefixed with "-C src/", for *-sources.jar files
+#TEST_PARAMS = any parameter to pass to the test runnable when "test-run"
+
+JAVAC = javac
+JAVAC_FLAGS += -encoding UTF-8 -d ./bin/ -cp ./src/
+JAVA = java
+JAVA_FLAGS += -cp ./bin/
+JAR = jar
+RJAR = java
+RJAR_FLAGS += -jar
+
+# Usual options:
+#      make            : to build the jar file
+#      make libs       : to update the libraries into src/
+#      make build      : to update the binaries (not the jar)
+#      make test       : to update the test binaries
+#      make build jar  : to update the binaries and jar file
+#      make clean      : to clean the directory of intermediate files
+#      make mrpropre   : to clean the directory of all outputs
+#      make run        : to run the program from the binaries
+#      make run-test   : to run the test program from the binaries
+#      make jrun       : to run the program from the jar file
+#      make install    : to install the application into $PREFIX
+
+# Note: build is actually slower than rebuild in most cases except when
+# small changes only are detected ; so we use rebuild by default
+
+all: build jar
+
+.PHONY: all clean mrproper mrpropre build run jrun jar resources test-resources install libs love 
+
+bin:
+       @mkdir -p bin
+
+jar: $(NAME).jar
+
+build: resources
+       @echo Compiling program...
+       @echo " src/$(MAIN)"
+       @$(JAVAC) $(JAVAC_FLAGS) "src/$(MAIN).java"
+       @[ "$(MORE)" = "" ] || for sup in $(MORE); do \
+               echo "  src/$$sup" ;\
+               $(JAVAC) $(JAVAC_FLAGS) "src/$$sup.java" ; \
+       done
+
+test: test-resources
+       @[ -e bin/$(MAIN).class ] || echo You need to build the sources
+       @[ -e bin/$(MAIN).class ]
+       @echo Compiling test program...
+       @[ "$(TEST)" != "" ] || echo No test sources defined.
+       @[ "$(TEST)"  = "" ] || for sup in $(TEST); do \
+               echo "  src/$$sup" ;\
+               $(JAVAC) $(JAVAC_FLAGS) "src/$$sup.java" ; \
+       done
+
+clean:
+       rm -rf bin/
+       @echo Removing sources taken from libs...
+       @for lib in libs/*-sources.jar libs/*-sources.patch.jar; do \
+               if [ "$$lib" != 'libs/*-sources.jar' -a "$$lib" != 'libs/*-sources.patch.jar' ]; then \
+                       basename "$$lib"; \
+                       jar tf "$$lib" | while read -r ln; do \
+                               [ -f "src/$$ln" ] && rm "src/$$ln"; \
+                       done; \
+                       jar tf "$$lib" | tac | while read -r ln; do \
+                               [ -d "src/$$ln" ] && rmdir "src/$$ln" 2>/dev/null || true; \
+                       done; \
+               fi \
+       done
+
+mrproper: mrpropre
+
+mrpropre: clean
+       rm -f $(NAME).jar
+       rm -f $(NAME)-sources.jar
+       [ ! -e VERSION ] || rm -f "$(NAME)-`cat VERSION`.jar"
+       [ ! -e VERSION ] || rm -f "$(NAME)-`cat VERSION`-sources.jar"
+
+love:
+       @echo " ...not war."
+
+resources: libs
+       @echo Copying resources into bin/...
+       @cd src && find . | grep -v '\.java$$' | grep -v '/test/' | while read -r ln; do \
+               if [ -f "$$ln" ]; then \
+                       dir="`dirname "$$ln"`"; \
+                       mkdir -p "../bin/$$dir" ; \
+                       cp "$$ln" "../bin/$$ln" ; \
+               fi ; \
+       done
+       @cp VERSION bin/
+
+test-resources: resources
+       @echo Copying test resources into bin/...
+       @cd src && find . | grep -v '\.java$$' | grep '/test/' | while read -r ln; do \
+               if [ -f "$$ln" ]; then \
+                       dir="`dirname "$$ln"`"; \
+                       mkdir -p "../bin/$$dir" ; \
+                       cp "$$ln" "../bin/$$ln" ; \
+               fi ; \
+       done
+
+libs: bin
+       @[ -e bin/libs -o ! -d libs ] || echo Extracting sources from libs...
+       @[ -e bin/libs -o ! -d libs ] || (cd src && for lib in ../libs/*-sources.jar ../libs/*-sources.patch.jar; do \
+               if [ "$$lib" != '../libs/*-sources.jar' -a "$$lib" != '../libs/*-sources.patch.jar' ]; then \
+                       basename "$$lib"; \
+                       jar xf "$$lib"; \
+               fi \
+       done )
+       @[ ! -d libs ] || touch bin/libs
+
+$(NAME).jar: resources
+       @[ -e bin/$(MAIN).class ] || echo You need to build the sources
+       @[ -e bin/$(MAIN).class ]
+       @echo Making JAR file...
+       @echo > bin/manifest
+       @[ "$(SJAR_FLAGS)" = "" ] || echo Creating $(NAME)-sources.jar...
+       @[ "$(SJAR_FLAGS)" = "" ] || $(JAR) cfm $(NAME)-sources.jar bin/manifest $(SJAR_FLAGS)
+       @[ "$(SJAR_FLAGS)" = "" ] || [ ! -e VERSION ] || echo Copying to "$(NAME)-`cat VERSION`-sources.jar"...
+       @[ "$(SJAR_FLAGS)" = "" ] || [ ! -e VERSION ] || cp $(NAME)-sources.jar "$(NAME)-`cat VERSION`-sources.jar"
+       @echo "Main-Class: `echo "$(MAIN)" | sed 's:/:.:g'`" > bin/manifest
+       @echo >> bin/manifest
+       $(JAR) cfm $(NAME).jar bin/manifest $(JAR_FLAGS)
+       @[ ! -e VERSION ] || echo Copying to "$(NAME)-`cat VERSION`.jar"...
+       @[ ! -e VERSION ] || cp $(NAME).jar "$(NAME)-`cat VERSION`.jar"
+
+run: 
+       @[ -e bin/$(MAIN).class ] || echo You need to build the sources
+       @[ -e bin/$(MAIN).class ]
+       @echo Running "$(NAME)"...
+       $(JAVA) $(JAVA_FLAGS) $(MAIN)
+
+jrun:
+       @[ -e $(NAME).jar ] || echo You need to build the jar
+       @[ -e $(NAME).jar ]
+       @echo Running "$(NAME).jar"...
+       $(RJAR) $(RJAR_FLAGS) $(NAME).jar
+
+run-test: 
+       @[ "$(TEST)" = "" -o -e "bin/$(TEST).class" ] || echo You need to build the test sources
+       @[ "$(TEST)" = "" -o -e "bin/$(TEST).class" ]
+       @echo Running tests for "$(NAME)"...
+       @[ "$(TEST)" != "" ] || echo No test sources defined.
+       [ "$(TEST)"  = "" ] || ( clear ; $(JAVA) $(JAVA_FLAGS) $(TEST) $(TEST_PARAMS) )
+
+install:
+       @[ -e $(NAME).jar ] || echo You need to build the jar
+       @[ -e $(NAME).jar ]
+       mkdir -p "$(PREFIX)/lib" "$(PREFIX)/bin"
+       cp $(NAME).jar "$(PREFIX)/lib/"
+       echo "#!/bin/sh" > "$(PREFIX)/bin/$(NAME)"
+       echo "$(RJAR) $(RJAR_FLAGS) \"$(PREFIX)/lib/$(NAME).jar\" \"\$$@\"" >> "$(PREFIX)/bin/$(NAME)"
+       chmod a+rx "$(PREFIX)/bin/$(NAME)"
+
diff --git a/README.md b/README.md
new file mode 100644 (file)
index 0000000..d7feec7
--- /dev/null
+++ b/README.md
@@ -0,0 +1,39 @@
+# GoFetch
+
+GoFetch is a simple web scrapper that outputs gopher-ready files.
+You point it to your gopher directory, you launch it, and you have a
+gopher view of the supported news sites.
+
+## Supported websites
+
+- Slashdot: News for nerds, stuff that matters!
+
+## Supported platforms
+
+Any platform with at lest Java 1.6 on it should be ok.
+
+## Usage
+
+```java -jar gofetch.jar [dir] [selector] [type] [max] [hostname] [port]```
+
+- dir: the target directory where to store the files
+- selector: the gopher selector to prepend (also a sub-directory in [dir])
+- max: the maximum number of stories to show on the main page
+- hostname: the gopher hostname
+- port: the gopher port
+
+## Compilation
+
+```./configure.sh && make```
+
+You can also import the java sources into, say, [Eclipse](https://eclipse.org/), and create a runnable JAR file from there.
+
+### Dependant libraries (included)
+
+- libs/nikiroo-utils-sources.jar: some shared utility functions I also use elsewhere
+- [libs/jsoup-sources.jar](https://jsoup.org/): a nice library to parse HTML
+
+Nothing else but Java 1.6+.
+
+Note that calling ```make libs``` will export the libraries into the src/ directory.
+
diff --git a/VERSION b/VERSION
new file mode 100644 (file)
index 0000000..8acdd82
--- /dev/null
+++ b/VERSION
@@ -0,0 +1 @@
+0.0.1
diff --git a/changelog.md b/changelog.md
new file mode 100644 (file)
index 0000000..81e28f3
--- /dev/null
@@ -0,0 +1,7 @@
+# Gofetch
+
+## Version 0.0.1
+
+- First version
+- Slashdot supported
+
diff --git a/configure.sh b/configure.sh
new file mode 100755 (executable)
index 0000000..b2317b2
--- /dev/null
@@ -0,0 +1,68 @@
+#!/bin/sh
+
+# default:
+PREFIX=/usr/local
+PROGS="java javac jar make sed"
+
+valid=true
+while [ "$*" != "" ]; do
+       key=`echo "$1" | cut -f1 -d=`
+       val=`echo "$1" | cut -f2 -d=`
+       case "$key" in
+       --)
+       ;;
+       --help) #               This help message
+               echo The following arguments can be used:
+               cat "$0" | grep '^\s*--' | grep '#' | while read ln; do
+                       cmd=`echo "$ln" | cut -f1 -d')'`
+                       msg=`echo "$ln" | cut -f2 -d'#'`
+                       echo "  $cmd$msg"
+               done
+       ;;
+       --prefix) #=PATH        Change the prefix to the given path
+               PREFIX="$val"
+       ;;
+       *)
+               echo "Unsupported parameter: '$1'" >&2
+               echo >&2
+               sh "$0" --help >&2
+               valid=false
+       ;;
+       esac
+       shift
+done
+
+[ $valid = false ] && exit 1
+
+MESS="A required program cannot be found:"
+for prog in $PROGS; do
+       out="`whereis -b "$prog" 2>/dev/null`"
+       if [ "$out" = "$prog:" ]; then
+               echo "$MESS $prog" >&2
+               valid=false
+       fi
+done
+
+[ $valid = false ] && exit 2
+
+if [ "`whereis tput`" = "tput:" ]; then
+       ok='"[ ok ]"';
+       ko='"[ !! ]"';
+       cols=80;
+else
+       ok='"`tput bold`[`tput setf 2` OK `tput init``tput bold`]`tput init`"';
+       ko='"`tput bold`[`tput setf 4` !! `tput init``tput bold`]`tput init`"';
+       cols='"`tput cols`"';
+fi;
+
+echo "MAIN = be/nikiroo/gofetch/Main" > Makefile
+echo "MORE = " >> Makefile
+echo "TEST = " >> Makefile
+echo "TEST_PARAMS = $cols $ok $ko" >> Makefile
+echo "NAME = gofetch" >> Makefile
+echo "PREFIX = $PREFIX" >> Makefile
+echo "JAR_FLAGS += -C bin/ org -C bin/ be -C bin/ VERSION" >> Makefile
+#echo "SJAR_FLAGS += -C src/ org -C src/ jexer -C src/ be -C ./ LICENSE -C ./ README.md -C ./ VERSION" >> Makefile
+
+cat Makefile.base >> Makefile
+
diff --git a/libs/jsoup-1.10.3-sources.jar b/libs/jsoup-1.10.3-sources.jar
new file mode 100644 (file)
index 0000000..1fe0db4
Binary files /dev/null and b/libs/jsoup-1.10.3-sources.jar differ
diff --git a/libs/nikiroo-utils-2.1.0-sources.jar b/libs/nikiroo-utils-2.1.0-sources.jar
new file mode 100644 (file)
index 0000000..e7975f4
Binary files /dev/null and b/libs/nikiroo-utils-2.1.0-sources.jar differ
diff --git a/src/be/nikiroo/gofetch/Fetcher.java b/src/be/nikiroo/gofetch/Fetcher.java
new file mode 100644 (file)
index 0000000..5a9c774
--- /dev/null
@@ -0,0 +1,199 @@
+package be.nikiroo.gofetch;
+
+import java.io.File;
+import java.io.FileWriter;
+import java.io.FilenameFilter;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.List;
+
+import be.nikiroo.gofetch.data.Comment;
+import be.nikiroo.gofetch.data.Story;
+import be.nikiroo.gofetch.output.Gopher;
+import be.nikiroo.gofetch.output.Html;
+import be.nikiroo.gofetch.output.Output;
+import be.nikiroo.gofetch.support.BasicSupport;
+import be.nikiroo.gofetch.support.BasicSupport.Type;
+import be.nikiroo.utils.IOUtils;
+
+/**
+ * The class that will manage the fetch operations.
+ * <p>
+ * It will scrap the required websites and process them to disk.
+ * 
+ * @author niki
+ */
+public class Fetcher {
+       private File dir;
+       private String preselector;
+       private int maxStories;
+       private String hostname;
+       private int port;
+       private Type type;
+
+       /**
+        * Prepare a new {@link Fetcher}.
+        * 
+        * @param dir
+        *            the target directory where to save the files (won't have
+        *            impact on the files' content)
+        * @param preselector
+        *            the sub directory and (pre-)selector to use for the resources
+        *            (<b>will</b> have an impact on the files' content)
+        * @param type
+        *            the type of news to get (or the special keyword ALL to get all
+        *            of the supported sources)
+        * @param maxStories
+        *            the maximum number of stories to show on the resume page
+        * @param hostname
+        *            the gopher host to use (<b>will</b> have an impact on the
+        *            files' content)
+        * @param port
+        *            the gopher port to use (<b>will</b> have an impact on the
+        *            files' content)
+        */
+       public Fetcher(File dir, String preselector, Type type, int maxStories,
+                       String hostname, int port) {
+               this.dir = dir;
+               this.preselector = preselector;
+               this.type = type;
+               this.maxStories = maxStories;
+               this.hostname = hostname;
+               this.port = port;
+       }
+
+       /**
+        * Start the fetching operation.
+        * <p>
+        * This method will handle the main pages itself, and will call
+        * {@link Fetcher#list(BasicSupport)} for the stories.
+        * 
+        * @throws IOException
+        *             in case of I/O error
+        */
+       public void start() throws IOException {
+               File cache = new File(dir, preselector);
+               cache.mkdirs();
+               File cacheHtml = new File(cache, "index.html");
+               cache = new File(cache, ".cache");
+
+               Output gopher = new Gopher(null, hostname, port);
+               Output html = new Html(null);
+
+               FileWriter writer = new FileWriter(cache);
+               try {
+                       FileWriter writerHtml = new FileWriter(cacheHtml);
+                       try {
+                               writer.append(gopher.getIndexHeader());
+                               writerHtml.append(html.getIndexHeader());
+
+                               Type types[];
+                               if (type == null) {
+                                       types = Type.values();
+                               } else {
+                                       types = new Type[] { type };
+                               }
+
+                               BasicSupport.setPreselector(preselector);
+                               for (Type type : types) {
+                                       BasicSupport support = BasicSupport.getSupport(type);
+                                       list(support);
+
+                                       writer.append("1" + support.getDescription()).append("\t")
+                                                       .append("1" + support.getSelector()) //
+                                                       .append("\t").append(hostname) //
+                                                       .append("\t").append(Integer.toString(port)) //
+                                                       .append("\r\n");
+                                       String ref = support.getSelector();
+                                       while (ref.startsWith("/")) {
+                                               ref = ref.substring(1);
+                                       }
+                                       writerHtml.append("<div class='site'><a href='../" + ref
+                                                       + "'>" + support.getDescription() + "</a></div>");
+                               }
+
+                               writer.append(gopher.getIndexFooter());
+                               writerHtml.append(html.getIndexFooter());
+                       } finally {
+                               writerHtml.close();
+                       }
+               } finally {
+                       writer.close();
+               }
+       }
+
+       /**
+        * Process the stories for the given {@link BasicSupport} to disk.
+        * 
+        * @param support
+        *            the {@link BasicSupport} to download from
+        * 
+        * @throws IOException
+        *             in case of I/O error
+        **/
+       private void list(BasicSupport support) throws IOException {
+               Output gopher = new Gopher(support.getType(), hostname, port);
+               Output html = new Html(support.getType());
+
+               new File(dir, support.getSelector()).mkdirs();
+
+               System.err
+                               .print("Listing recent news for " + support.getType() + "...");
+               List<Story> stories = support.list();
+               System.err.println(" " + stories.size() + " stories found!");
+               int i = 1;
+               for (Story story : stories) {
+                       IOUtils.writeSmallFile(dir, story.getSelector() + ".header",
+                                       gopher.export(story));
+                       IOUtils.writeSmallFile(dir, story.getSelector() + ".header.html",
+                                       html.export(story));
+
+                       System.err.println(String.format("%02d/%02d", i, stories.size())
+                                       + " Fetching comments for story " + story.getId() + "...");
+                       List<Comment> comments = support.getComments(story);
+
+                       IOUtils.writeSmallFile(dir, story.getSelector(),
+                                       gopher.export(story, comments));
+                       IOUtils.writeSmallFile(dir, story.getSelector() + ".html",
+                                       html.export(story, comments));
+
+                       i++;
+               }
+
+               File varDir = new File(dir, support.getSelector());
+               String[] headers = varDir.list(new FilenameFilter() {
+                       @Override
+                       public boolean accept(File dir, String name) {
+                               return name.endsWith(".header");
+                       }
+               });
+
+               File cache = new File(varDir, ".cache");
+               File cacheHtml = new File(varDir, "index.html");
+               FileWriter writer = new FileWriter(cache);
+               try {
+                       FileWriter writerHtml = new FileWriter(cacheHtml);
+                       try {
+                               if (headers.length > 0) {
+                                       Arrays.sort(headers);
+                                       int from = headers.length - 1;
+                                       int to = headers.length - maxStories;
+                                       if (to < 0) {
+                                               to = 0;
+                                       }
+                                       for (i = from; i >= to; i--) {
+                                               writer.append(IOUtils.readSmallFile(new File(varDir,
+                                                               headers[i])));
+
+                                               writerHtml.append(IOUtils.readSmallFile(new File(
+                                                               varDir, headers[i] + ".html")));
+                                       }
+                               }
+                       } finally {
+                               writerHtml.close();
+                       }
+               } finally {
+                       writer.close();
+               }
+       }
+}
diff --git a/src/be/nikiroo/gofetch/Main.java b/src/be/nikiroo/gofetch/Main.java
new file mode 100644 (file)
index 0000000..590d4a0
--- /dev/null
@@ -0,0 +1,105 @@
+package be.nikiroo.gofetch;
+
+import java.io.File;
+import java.io.IOException;
+
+import be.nikiroo.gofetch.support.BasicSupport.Type;
+
+/**
+ * This class is tha main entry point of the program. It will parse the
+ * arguments, checks them (and warn-and-exit if they are invalid) then call
+ * {@link Fetcher#start()}.
+ * 
+ * @author niki
+ */
+public class Main {
+       /**
+        * Main entry point.
+        * 
+        * @param args
+        *            save-to-dir selector-subdir type max hostname port
+        * 
+        * @throws IOException
+        *             in case of I/O error
+        */
+       public static void main(String[] args) throws IOException {
+               if (args.length < 6) {
+                       System.err
+                                       .println("Syntax error: gofecth [target dir] [selector] [type or 'ALL'] [max stories] [hostname] [port]");
+                       System.exit(1);
+               }
+
+               String dirStr = args[0];
+               String preselectorStr = args[1];
+               String typeStr = args[2];
+               String maxStoriesStr = args[3];
+               String hostnameStr = args[4];
+               String portStr = args[5];
+
+               // Dir
+               File dir = new File(dirStr);
+               dir.mkdirs();
+
+               if (!dir.exists()) {
+                       System.err.println("Cannot open/create the root directory: "
+                                       + dirStr);
+                       System.exit(1);
+               }
+
+               if (dir.isFile()) {
+                       System.err
+                                       .println("Root directory exists and is a file: " + dirStr);
+                       System.exit(1);
+               }
+
+               // Selector base (empty is ok, DO NOT end with /)
+               String preselector = "";
+               if (preselectorStr != null && !preselectorStr.startsWith("/")) {
+                       preselector = "/" + preselectorStr;
+               }
+               while (preselector.endsWith("/")) {
+                       preselector = preselector.substring(0, preselector.length() - 1);
+               }
+
+               // Type to download
+               Type type = null;
+               if (!"ALL".equals(typeStr)) {
+                       try {
+                               Type.valueOf(typeStr.toUpperCase());
+                       } catch (IllegalArgumentException e) {
+                               System.err.println("Invalid type: " + typeStr);
+                               System.exit(1);
+                       }
+               }
+
+               // Max number of stories to display in the cache
+               int maxStories = 0;
+               try {
+                       maxStories = Integer.parseInt(maxStoriesStr);
+               } catch (NumberFormatException e) {
+                       System.err
+                                       .println("The maximum number of stories cannot be parsed: "
+                                                       + maxStoriesStr);
+                       System.exit(1);
+               }
+
+               //
+               String hostname = hostnameStr;
+
+               //
+               int port = 0;
+               try {
+                       port = Integer.parseInt(portStr);
+               } catch (NumberFormatException e) {
+                       System.err.println("The port cannot be parsed: " + portStr);
+                       System.exit(1);
+               }
+
+               if (port < 0 || port > 65535) {
+                       System.err.println("Invalid port number: " + portStr);
+                       System.exit(1);
+               }
+
+               new Fetcher(dir, preselector, type, maxStories, hostname, port).start();
+       }
+}
\ No newline at end of file
diff --git a/src/be/nikiroo/gofetch/StringJustifier.java b/src/be/nikiroo/gofetch/StringJustifier.java
new file mode 100644 (file)
index 0000000..e9e6579
--- /dev/null
@@ -0,0 +1,244 @@
+/*
+ * This file was taken from:
+ * Jexer - Java Text User Interface
+ *
+ * The MIT License (MIT)
+ *
+ * Copyright (C) 2017 Kevin Lamonte
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * @author Kevin Lamonte [kevin.lamonte@gmail.com]
+ * @version 1
+ */
+package be.nikiroo.gofetch;
+
+import java.util.LinkedList;
+import java.util.List;
+
+/**
+ * StringJustifier contains methods to convert one or more long lines of strings
+ * into justified text paragraphs.
+ */
+public class StringJustifier {
+
+       /**
+        * Left-justify a string into a list of lines.
+        * 
+        * @param str
+        *            the string
+        * @param n
+        *            the maximum number of characters in a line
+        * @return the list of lines
+        */
+       public static List<String> left(final String str, final int n) {
+               List<String> result = new LinkedList<String>();
+
+               /*
+                * General procedure:
+                * 
+                * 1. Split on '\n' into paragraphs.
+                * 
+                * 2. Scan each line, noting the position of the last
+                * beginning-of-a-word.
+                * 
+                * 3. Chop at the last #2 if the next beginning-of-a-word exceeds n.
+                * 
+                * 4. Return the lines.
+                */
+
+               String[] rawLines = str.split("\n");
+               for (int i = 0; i < rawLines.length; i++) {
+                       StringBuilder line = new StringBuilder();
+                       StringBuilder word = new StringBuilder();
+                       boolean inWord = false;
+                       for (int j = 0; j < rawLines[i].length(); j++) {
+                               char ch = rawLines[i].charAt(j);
+                               if ((ch == ' ') || (ch == '\t')) {
+                                       if (inWord == true) {
+                                               // We have just transitioned from a word to
+                                               // whitespace. See if we have enough space to add
+                                               // the word to the line.
+                                               if (word.length() + line.length() > n) {
+                                                       // This word will exceed the line length. Wrap
+                                                       // at it instead.
+                                                       result.add(line.toString());
+                                                       line = new StringBuilder();
+                                               }
+                                               if ((word.toString().startsWith(" "))
+                                                               && (line.length() == 0)) {
+                                                       line.append(word.substring(1));
+                                               } else {
+                                                       line.append(word);
+                                               }
+                                               word = new StringBuilder();
+                                               word.append(ch);
+                                               inWord = false;
+                                       } else {
+                                               // We are in the whitespace before another word. Do
+                                               // nothing.
+                                       }
+                               } else {
+                                       if (inWord == true) {
+                                               // We are appending to a word.
+                                               word.append(ch);
+                                       } else {
+                                               // We have transitioned from whitespace to a word.
+                                               word.append(ch);
+                                               inWord = true;
+                                       }
+                               }
+                       } // for (int j = 0; j < rawLines[i].length(); j++)
+
+                       if (word.length() + line.length() > n) {
+                               // This word will exceed the line length. Wrap at it
+                               // instead.
+                               result.add(line.toString());
+                               line = new StringBuilder();
+                       }
+                       if ((word.toString().startsWith(" ")) && (line.length() == 0)) {
+                               line.append(word.substring(1));
+                       } else {
+                               line.append(word);
+                       }
+                       result.add(line.toString());
+               } // for (int i = 0; i < rawLines.length; i++) {
+
+               return result;
+       }
+
+       /**
+        * Right-justify a string into a list of lines.
+        * 
+        * @param str
+        *            the string
+        * @param n
+        *            the maximum number of characters in a line
+        * @return the list of lines
+        */
+       public static List<String> right(final String str, final int n) {
+               List<String> result = new LinkedList<String>();
+
+               /*
+                * Same as left(), but preceed each line with spaces to make it n chars
+                * long.
+                */
+               List<String> lines = left(str, n);
+               for (String line : lines) {
+                       StringBuilder sb = new StringBuilder();
+                       for (int i = 0; i < n - line.length(); i++) {
+                               sb.append(' ');
+                       }
+                       sb.append(line);
+                       result.add(sb.toString());
+               }
+
+               return result;
+       }
+
+       /**
+        * Center a string into a list of lines.
+        * 
+        * @param str
+        *            the string
+        * @param n
+        *            the maximum number of characters in a line
+        * @return the list of lines
+        */
+       public static List<String> center(final String str, final int n) {
+               List<String> result = new LinkedList<String>();
+
+               /*
+                * Same as left(), but preceed/succeed each line with spaces to make it
+                * n chars long.
+                */
+               List<String> lines = left(str, n);
+               for (String line : lines) {
+                       StringBuilder sb = new StringBuilder();
+                       int l = (n - line.length()) / 2;
+                       int r = n - line.length() - l;
+                       for (int i = 0; i < l; i++) {
+                               sb.append(' ');
+                       }
+                       sb.append(line);
+                       for (int i = 0; i < r; i++) {
+                               sb.append(' ');
+                       }
+                       result.add(sb.toString());
+               }
+
+               return result;
+       }
+
+       /**
+        * Fully-justify a string into a list of lines.
+        * 
+        * @param str
+        *            the string
+        * @param n
+        *            the maximum number of characters in a line
+        * @return the list of lines
+        */
+       public static List<String> full(final String str, final int n) {
+               List<String> result = new LinkedList<String>();
+
+               /*
+                * Same as left(), but insert spaces between words to make each line n
+                * chars long. The "algorithm" here is pretty dumb: it performs a split
+                * on space and then re-inserts multiples of n between words.
+                */
+               List<String> lines = left(str, n);
+               for (int lineI = 0; lineI < lines.size() - 1; lineI++) {
+                       String line = lines.get(lineI);
+                       String[] words = line.split(" ");
+                       if (words.length > 1) {
+                               int charCount = 0;
+                               for (int i = 0; i < words.length; i++) {
+                                       charCount += words[i].length();
+                               }
+                               int spaceCount = n - charCount;
+                               int q = spaceCount / (words.length - 1);
+                               int r = spaceCount % (words.length - 1);
+                               StringBuilder sb = new StringBuilder();
+                               for (int i = 0; i < words.length - 1; i++) {
+                                       sb.append(words[i]);
+                                       for (int j = 0; j < q; j++) {
+                                               sb.append(' ');
+                                       }
+                                       if (r > 0) {
+                                               sb.append(' ');
+                                               r--;
+                                       }
+                               }
+                               for (int j = 0; j < r; j++) {
+                                       sb.append(' ');
+                               }
+                               sb.append(words[words.length - 1]);
+                               result.add(sb.toString());
+                       } else {
+                               result.add(line);
+                       }
+               }
+               if (lines.size() > 0) {
+                       result.add(lines.get(lines.size() - 1));
+               }
+
+               return result;
+       }
+}
diff --git a/src/be/nikiroo/gofetch/data/Comment.java b/src/be/nikiroo/gofetch/data/Comment.java
new file mode 100644 (file)
index 0000000..44c0de1
--- /dev/null
@@ -0,0 +1,77 @@
+package be.nikiroo.gofetch.data;
+
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+public class Comment implements Iterable<Comment> {
+       private String id;
+       private String author;
+       private String title;
+       private String date;
+       private String content;
+       private List<Comment> children;
+
+       public Comment(String id, String author, String title, String date,
+                       String content) {
+               this.id = id;
+               this.author = author;
+               this.title = title;
+               this.date = date;
+               this.content = content;
+               this.children = new ArrayList<Comment>();
+       }
+
+       public void add(Comment comment) {
+               children.add(comment);
+       }
+
+       public void addAll(List<Comment> comments) {
+               children.addAll(comments);
+       }
+
+       /**
+        * @return the id
+        */
+       public String getId() {
+               return id;
+       }
+
+       /**
+        * @return the author
+        */
+       public String getAuthor() {
+               return author;
+       }
+
+       /**
+        * @return the title
+        */
+       public String getTitle() {
+               return title;
+       }
+
+       /**
+        * @return the date
+        */
+       public String getDate() {
+               return date;
+       }
+
+       /**
+        * @return the content
+        */
+       public String getContent() {
+               return content;
+       }
+
+       public boolean isEmpty() {
+               return children.isEmpty()
+                               && ("" + author + title + content).trim().isEmpty();
+       }
+
+       @Override
+       public Iterator<Comment> iterator() {
+               return children.iterator();
+       }
+}
diff --git a/src/be/nikiroo/gofetch/data/Story.java b/src/be/nikiroo/gofetch/data/Story.java
new file mode 100644 (file)
index 0000000..aa5aecc
--- /dev/null
@@ -0,0 +1,96 @@
+package be.nikiroo.gofetch.data;
+
+import java.net.URL;
+
+import be.nikiroo.gofetch.support.BasicSupport;
+import be.nikiroo.gofetch.support.BasicSupport.Type;
+
+/**
+ * A news story.
+ * 
+ * @author niki
+ */
+public class Story {
+       private Type type;
+       private String id;
+       private String title;
+       private String details;
+       private String urlInternal;
+       private String urlExternal;
+       private String content;
+
+       /**
+        * Create a news story.
+        * 
+        * @param type
+        *            the source {@link Type}
+        * @param id
+        *            the news ID
+        * @param title
+        *            the news title
+        * @param details
+        *            some details to add to the title
+        * @param urlInternal
+        *            the {@link URL} to get this news on the associated news site
+        * @param urlExternal
+        *            an external {@link URL} that serve as the news' source, if any
+        * @param content
+        *            the story content
+        */
+       public Story(Type type, String id, String title, String details,
+                       String urlInternal, String urlExternal, String content) {
+               this.type = type;
+               this.id = id;
+               this.title = title;
+               this.details = details;
+               this.urlInternal = urlInternal;
+               this.urlExternal = urlExternal;
+               this.content = content;
+       }
+
+       public String getSelector() {
+               return BasicSupport.getSelector(type) + id;
+       }
+
+       /**
+        * @return the id
+        */
+       public String getId() {
+               return id;
+       }
+
+       /**
+        * @return the title
+        */
+       public String getTitle() {
+               return title;
+       }
+
+       /**
+        * @return the details
+        */
+       public String getDetails() {
+               return details;
+       }
+
+       /**
+        * @return the url
+        */
+       public String getUrlInternal() {
+               return urlInternal;
+       }
+
+       /**
+        * @return the urlExternal
+        */
+       public String getUrlExternal() {
+               return urlExternal;
+       }
+
+       /**
+        * @return the body
+        */
+       public String getContent() {
+               return content;
+       }
+}
\ No newline at end of file
diff --git a/src/be/nikiroo/gofetch/output/Gopher.java b/src/be/nikiroo/gofetch/output/Gopher.java
new file mode 100644 (file)
index 0000000..8cce4a5
--- /dev/null
@@ -0,0 +1,131 @@
+package be.nikiroo.gofetch.output;
+
+import java.util.List;
+
+import be.nikiroo.gofetch.StringJustifier;
+import be.nikiroo.gofetch.data.Comment;
+import be.nikiroo.gofetch.data.Story;
+import be.nikiroo.gofetch.support.BasicSupport.Type;
+
+public class Gopher extends Output {
+       static private final int LINE_SIZE = 70;
+
+       private String hostname;
+       private int port;
+
+       public Gopher(Type type, String hostname, int port) {
+               super(type);
+
+               this.hostname = hostname;
+               this.port = port;
+       }
+
+       @Override
+       public String getIndexHeader() {
+               return "iHello world!\r\niThis is my news site.\r\ni\r\n";
+       }
+
+       @Override
+       public String getIndexFooter() {
+               return "";
+       }
+
+       @Override
+       public String export(Story story) {
+               return append(new StringBuilder(), story, false).append("i\r\ni\r\n")
+                               .toString();
+       }
+
+       @Override
+       public String export(Story story, List<Comment> comments) {
+               StringBuilder builder = new StringBuilder();
+               append(builder, story, true);
+
+               builder.append("i\r\n");
+
+               for (Comment comment : comments) {
+                       append(builder, comment, "");
+               }
+
+               builder.append("i\r\n");
+
+               return builder.toString();
+       }
+
+       private StringBuilder append(StringBuilder builder, Comment comment,
+                       String space) {
+
+               if (space.length() > LINE_SIZE - 20) {
+                       space = space.substring(0, LINE_SIZE - 20);
+               }
+
+               appendLeft(builder, comment.getTitle(), ">> ", "   ", space);
+               appendLeft(builder, "(" + comment.getAuthor() + ")", "   ", "   ",
+                               space);
+
+               builder.append("i\r\n");
+
+               appendLeft(builder, comment.getContent(), "   ", "   ", space);
+
+               builder.append("i\r\n");
+               for (Comment subComment : comment) {
+                       append(builder, subComment, space + "   ");
+                       builder.append("i\r\n");
+               }
+
+               return builder;
+       }
+
+       private StringBuilder append(StringBuilder builder, Story story,
+                       boolean links) {
+               if (links) {
+                       appendCenter(builder, story.getTitle(), true);
+                       builder.append("i\r\n");
+                       appendLeft(builder, story.getDetails(), "", "", "  ");
+                       builder.append("i\r\n");
+                       builder.append("i  o News link: ").append(story.getUrlInternal());
+                       builder.append("i\r\n");
+                       builder.append("i  o Source link: ").append(story.getUrlExternal());
+                       builder.append("i\r\n");
+               } else {
+                       builder.append('1').append(story.getTitle()) //
+                                       .append('\t').append("0").append(story.getSelector()) //
+                                       .append('\t').append(hostname) //
+                                       .append('\t').append(port) //
+                                       .append("\r\n");
+                       appendLeft(builder, story.getDetails(), "", "", "  ");
+               }
+
+               builder.append("i\r\n");
+
+               appendLeft(builder, story.getContent(), "", "", "    ");
+
+               builder.append("i\r\n");
+
+               return builder;
+       }
+
+       // note: adds "i"
+       private static void appendCenter(StringBuilder builder, String text,
+                       boolean allCaps) {
+               if (allCaps) {
+                       text = text.toUpperCase();
+               }
+
+               for (String line : StringJustifier.center(text, LINE_SIZE)) {
+                       builder.append("i").append(line).append("\r\n");
+               }
+       }
+
+       // note: adds "i"
+       private static void appendLeft(StringBuilder builder, String text,
+                       String prependFirst, String prependOthers, String space) {
+               String prepend = prependFirst;
+               for (String line : StringJustifier.left(text,
+                               LINE_SIZE - space.length())) {
+                       builder.append("i").append(space).append(prepend).append(line)
+                                       .append("\r\n");
+                       prepend = prependOthers;
+               }
+       }
+}
diff --git a/src/be/nikiroo/gofetch/output/Html.java b/src/be/nikiroo/gofetch/output/Html.java
new file mode 100644 (file)
index 0000000..ab7489b
--- /dev/null
@@ -0,0 +1,81 @@
+package be.nikiroo.gofetch.output;
+
+import java.util.List;
+
+import be.nikiroo.gofetch.data.Comment;
+import be.nikiroo.gofetch.data.Story;
+import be.nikiroo.gofetch.support.BasicSupport.Type;
+
+public class Html extends Output {
+       public Html(Type type) {
+               super(type);
+       }
+
+       @Override
+       public String getIndexHeader() {
+               return "<h1>Hello world!</h1><br/>TODO\n";
+       }
+
+       @Override
+       public String getIndexFooter() {
+               return "";
+       }
+
+       @Override
+       public String export(Story story) {
+               return appendHtml(new StringBuilder(), story, true).append("<hr/>\n")
+                               .toString();
+       }
+
+       @Override
+       public String export(Story story, List<Comment> comments) {
+               StringBuilder builder = new StringBuilder();
+               appendHtml(builder, story, false);
+
+               // TODO: ext link and link
+
+               builder.append("<hr/>");
+               for (Comment comment : comments) {
+                       appendHtml(builder, comment, "  ");
+               }
+
+               return builder.toString();
+       }
+
+       private void appendHtml(StringBuilder builder, Comment comment, String space) {
+               builder.append(space).append(
+                               "<div class='comment' style='display: block; margin-left: "
+                                               + (20 * space.length()) + "px'>");
+               builder.append(space).append("  <h2>").append(comment.getTitle())
+                               .append("</h2>\n");
+               builder.append(space).append("  <div class='by'>")
+                               .append(comment.getAuthor()).append("</div>\n");
+               builder.append(space).append("  <div class='comment_content'>")
+                               .append(comment.getContent()).append("</div>\n");
+               for (Comment subComment : comment) {
+                       appendHtml(builder, subComment, space + "  ");
+               }
+               builder.append(space).append("</div>");
+       }
+
+       private StringBuilder appendHtml(StringBuilder builder, Story story,
+                       boolean links) {
+               // TODO
+               builder.append("<div class='story'>");
+               if (links) {
+                       builder.append("        <h1><a href='" + story.getId() + ".html'>"
+                                       + story.getTitle() + "</a></h1>");
+               } else {
+                       builder.append("        <h1>" + story.getTitle() + "</h1>");
+               }
+               builder.append("        <div class='details'>(" + story.getDetails()
+                               + ")</div>");
+               builder.append("        <br/>");
+               builder.append("        <div class='content'>");
+               builder.append("                " + story.getContent());
+               builder.append("        </div>");
+               builder.append("</div>");
+
+               return builder;
+       }
+}
diff --git a/src/be/nikiroo/gofetch/output/Output.java b/src/be/nikiroo/gofetch/output/Output.java
new file mode 100644 (file)
index 0000000..b12596e
--- /dev/null
@@ -0,0 +1,65 @@
+package be.nikiroo.gofetch.output;
+
+import java.util.List;
+
+import be.nikiroo.gofetch.data.Comment;
+import be.nikiroo.gofetch.data.Story;
+import be.nikiroo.gofetch.support.BasicSupport.Type;
+
+/**
+ * Base class for output operations.
+ * 
+ * @author niki
+ */
+public abstract class Output {
+       /**
+        * The type of source, can be NULL for no-type.
+        */
+       protected Type type;
+
+       /**
+        * Create a new {@link Output} class for the given type (which can be NULL).
+        * 
+        * @param type
+        *            the type or NULL for no type
+        */
+       public Output(Type type) {
+               this.type = type;
+       }
+
+       /**
+        * Get the header to use in the index file.
+        * 
+        * @return the header
+        */
+       abstract public String getIndexHeader();
+
+       /**
+        * Get the footer to use in the index file.
+        * 
+        * @return the footer
+        */
+       abstract public String getIndexFooter();
+
+       /**
+        * Export a story (in resume mode).
+        * 
+        * @param story
+        *            the story
+        * 
+        * @return the resume
+        */
+       abstract public String export(Story story);
+
+       /**
+        * Export a full story with comments.
+        * 
+        * @param story
+        *            the story
+        * @param comments
+        *            the comments
+        * 
+        * @return the story
+        */
+       abstract public String export(Story story, List<Comment> comments);
+}
diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java
new file mode 100644 (file)
index 0000000..1105df5
--- /dev/null
@@ -0,0 +1,81 @@
+package be.nikiroo.gofetch.support;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.net.URLConnection;
+import java.util.List;
+import java.util.zip.GZIPInputStream;
+
+import be.nikiroo.gofetch.data.Comment;
+import be.nikiroo.gofetch.data.Story;
+
+public abstract class BasicSupport {
+       public enum Type {
+               SLASHDOT,
+       }
+
+       static private String preselector;
+
+       private Type type;
+
+       abstract public List<Story> list() throws IOException;
+
+       abstract public List<Comment> getComments(Story story) throws IOException;
+
+       abstract public String getDescription();
+       
+       public String getSelector() {
+               return getSelector(type);
+       }
+
+       public Type getType() {
+               return type;
+       }
+
+       protected void setType(Type type) {
+               this.type = type;
+       }
+
+       /**
+        * @param preselector
+        *            the preselector to set
+        */
+       static public void setPreselector(String preselector) {
+               BasicSupport.preselector = preselector;
+       }
+
+       static public BasicSupport getSupport(Type type) {
+               BasicSupport support = null;
+
+               if (type != null) {
+                       switch (type) {
+                       case SLASHDOT:
+                               support = new Slashdot();
+                               break;
+                       }
+
+                       if (support != null) {
+                               support.setType(type);
+                       }
+               }
+
+               return support;
+       }
+
+       static public String getSelector(Type type) {
+               return preselector + "/" + type + "/";
+       }
+
+       // TODO: check Downloader.java?
+       static protected InputStream open(URL url) throws IOException {
+               URLConnection conn = url.openConnection();
+               conn.connect();
+               InputStream in = conn.getInputStream();
+               if ("gzip".equals(conn.getContentEncoding())) {
+                       in = new GZIPInputStream(in);
+               }
+
+               return in;
+       }
+}
diff --git a/src/be/nikiroo/gofetch/support/Slashdot.java b/src/be/nikiroo/gofetch/support/Slashdot.java
new file mode 100644 (file)
index 0000000..5dfa03b
--- /dev/null
@@ -0,0 +1,126 @@
+package be.nikiroo.gofetch.support;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.net.URL;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.jsoup.helper.DataUtil;
+import org.jsoup.nodes.Document;
+import org.jsoup.nodes.Element;
+import org.jsoup.select.Elements;
+
+import be.nikiroo.gofetch.data.Comment;
+import be.nikiroo.gofetch.data.Story;
+
+public class Slashdot extends BasicSupport {
+       @Override
+       public String getDescription() {
+               return "Slashdot: News for nerds, stuff that matters!";
+       }
+
+       @Override
+       public List<Story> list() throws IOException {
+               List<Story> list = new ArrayList<Story>();
+
+               URL url = new URL("https://slashdot.org/");
+               InputStream in = open(url);
+               Document doc = DataUtil.load(in, "UTF-8", url.toString());
+               Elements stories = doc.getElementsByTag("header");
+               for (Element story : stories) {
+                       Elements titles = story.getElementsByClass("story-title");
+                       if (titles.size() == 0) {
+                               continue;
+                       }
+                       Element title = titles.get(0);
+
+                       String id = "" + title.attr("id");
+                       if (id.startsWith("title-")) {
+                               id = id.substring("title-".length());
+                       }
+
+                       Elements links = title.getElementsByTag("a");
+                       String intUrl = null;
+                       String extUrl = null;
+                       if (links.size() > 0) {
+                               intUrl = links.get(0).absUrl("href");
+                       }
+                       if (links.size() > 1) {
+                               extUrl = links.get(1).absUrl("href");
+                       }
+
+                       String details = "";
+                       Elements detailsElements = story.getElementsByClass("details");
+                       if (detailsElements.size() > 0) {
+                               details = detailsElements.get(0).text();
+                       }
+
+                       String body = "";
+                       Element bodyElement = doc.getElementById("text-" + id);
+                       if (bodyElement != null) {
+                               body = bodyElement.text();
+                       }
+
+                       list.add(new Story(getType(), id, title.text(), details, intUrl,
+                                       extUrl, body));
+               }
+
+               return list;
+       }
+
+       @Override
+       public List<Comment> getComments(Story story) throws IOException {
+               List<Comment> comments = new ArrayList<Comment>();
+
+               URL url = new URL(story.getUrlInternal());
+               InputStream in = open(url);
+               Document doc = DataUtil.load(in, "UTF-8", url.toString());
+               Element listing = doc.getElementById("commentlisting");
+               if (listing != null) {
+                       comments.addAll(getComments(listing));
+               }
+
+               return comments;
+       }
+
+       private List<Comment> getComments(Element listing) {
+               List<Comment> comments = new ArrayList<Comment>();
+               for (Element commentElement : listing.children()) {
+                       if (commentElement.hasClass("comment")) {
+                               Comment comment = getComment(commentElement);
+                               if (!comment.isEmpty()) {
+                                       comments.add(comment);
+                               }
+                       }
+               }
+               return comments;
+       }
+
+       private Comment getComment(Element commentElement) {
+               String title = firstOrEmpty(commentElement, "title");
+               String author = firstOrEmpty(commentElement, "by");
+               String content = firstOrEmpty(commentElement, "commentBody");
+               String date = firstOrEmpty(commentElement, "otherdetails");
+
+               Comment comment = new Comment(commentElement.id(), author, title, date,
+                               content);
+
+               for (Element child : commentElement.children()) {
+                       if (child.id().contains("commtree_")) {
+                               comment.addAll(getComments(child));
+                       }
+               }
+
+               return comment;
+       }
+
+       private String firstOrEmpty(Element element, String className) {
+               Elements subElements = element.getElementsByClass(className);
+               if (subElements.size() > 0) {
+                       return subElements.get(0).text();
+               }
+
+               return "";
+       }
+}