From: Niki Roo Date: Sat, 5 Aug 2017 21:53:59 +0000 (+0200) Subject: First version (slashdot supported) X-Git-Url: https://git.nikiroo.be/?a=commitdiff_plain;h=737852686d8897331706ed4b902dbd9d5038cb53;p=gofetch.git First version (slashdot supported) --- diff --git a/Makefile.base b/Makefile.base new file mode 100644 index 0000000..3304c83 --- /dev/null +++ b/Makefile.base @@ -0,0 +1,163 @@ +# Required parameters (the commented out ones are supposed to change per project): + +#MAIN = path to main java source to compile +#MORE = path to supplementary needed resources not linked from MAIN +#NAME = name of project (used for jar output file) +#PREFIX = usually /usr/local (where to install the program) +#TEST = path to main test source to compile +#JAR_FLAGS += a list of things to pack, each usually prefixed with "-C bin/" +#SJAR_FLAGS += a list of things to pack, each usually prefixed with "-C src/", for *-sources.jar files +#TEST_PARAMS = any parameter to pass to the test runnable when "test-run" + +JAVAC = javac +JAVAC_FLAGS += -encoding UTF-8 -d ./bin/ -cp ./src/ +JAVA = java +JAVA_FLAGS += -cp ./bin/ +JAR = jar +RJAR = java +RJAR_FLAGS += -jar + +# Usual options: +# make : to build the jar file +# make libs : to update the libraries into src/ +# make build : to update the binaries (not the jar) +# make test : to update the test binaries +# make build jar : to update the binaries and jar file +# make clean : to clean the directory of intermediate files +# make mrpropre : to clean the directory of all outputs +# make run : to run the program from the binaries +# make run-test : to run the test program from the binaries +# make jrun : to run the program from the jar file +# make install : to install the application into $PREFIX + +# Note: build is actually slower than rebuild in most cases except when +# small changes only are detected ; so we use rebuild by default + +all: build jar + +.PHONY: all clean mrproper mrpropre build run jrun jar resources test-resources install libs love + +bin: + @mkdir -p bin + +jar: $(NAME).jar + +build: resources + @echo Compiling program... + @echo " src/$(MAIN)" + @$(JAVAC) $(JAVAC_FLAGS) "src/$(MAIN).java" + @[ "$(MORE)" = "" ] || for sup in $(MORE); do \ + echo " src/$$sup" ;\ + $(JAVAC) $(JAVAC_FLAGS) "src/$$sup.java" ; \ + done + +test: test-resources + @[ -e bin/$(MAIN).class ] || echo You need to build the sources + @[ -e bin/$(MAIN).class ] + @echo Compiling test program... + @[ "$(TEST)" != "" ] || echo No test sources defined. + @[ "$(TEST)" = "" ] || for sup in $(TEST); do \ + echo " src/$$sup" ;\ + $(JAVAC) $(JAVAC_FLAGS) "src/$$sup.java" ; \ + done + +clean: + rm -rf bin/ + @echo Removing sources taken from libs... + @for lib in libs/*-sources.jar libs/*-sources.patch.jar; do \ + if [ "$$lib" != 'libs/*-sources.jar' -a "$$lib" != 'libs/*-sources.patch.jar' ]; then \ + basename "$$lib"; \ + jar tf "$$lib" | while read -r ln; do \ + [ -f "src/$$ln" ] && rm "src/$$ln"; \ + done; \ + jar tf "$$lib" | tac | while read -r ln; do \ + [ -d "src/$$ln" ] && rmdir "src/$$ln" 2>/dev/null || true; \ + done; \ + fi \ + done + +mrproper: mrpropre + +mrpropre: clean + rm -f $(NAME).jar + rm -f $(NAME)-sources.jar + [ ! -e VERSION ] || rm -f "$(NAME)-`cat VERSION`.jar" + [ ! -e VERSION ] || rm -f "$(NAME)-`cat VERSION`-sources.jar" + +love: + @echo " ...not war." + +resources: libs + @echo Copying resources into bin/... + @cd src && find . | grep -v '\.java$$' | grep -v '/test/' | while read -r ln; do \ + if [ -f "$$ln" ]; then \ + dir="`dirname "$$ln"`"; \ + mkdir -p "../bin/$$dir" ; \ + cp "$$ln" "../bin/$$ln" ; \ + fi ; \ + done + @cp VERSION bin/ + +test-resources: resources + @echo Copying test resources into bin/... + @cd src && find . | grep -v '\.java$$' | grep '/test/' | while read -r ln; do \ + if [ -f "$$ln" ]; then \ + dir="`dirname "$$ln"`"; \ + mkdir -p "../bin/$$dir" ; \ + cp "$$ln" "../bin/$$ln" ; \ + fi ; \ + done + +libs: bin + @[ -e bin/libs -o ! -d libs ] || echo Extracting sources from libs... + @[ -e bin/libs -o ! -d libs ] || (cd src && for lib in ../libs/*-sources.jar ../libs/*-sources.patch.jar; do \ + if [ "$$lib" != '../libs/*-sources.jar' -a "$$lib" != '../libs/*-sources.patch.jar' ]; then \ + basename "$$lib"; \ + jar xf "$$lib"; \ + fi \ + done ) + @[ ! -d libs ] || touch bin/libs + +$(NAME).jar: resources + @[ -e bin/$(MAIN).class ] || echo You need to build the sources + @[ -e bin/$(MAIN).class ] + @echo Making JAR file... + @echo > bin/manifest + @[ "$(SJAR_FLAGS)" = "" ] || echo Creating $(NAME)-sources.jar... + @[ "$(SJAR_FLAGS)" = "" ] || $(JAR) cfm $(NAME)-sources.jar bin/manifest $(SJAR_FLAGS) + @[ "$(SJAR_FLAGS)" = "" ] || [ ! -e VERSION ] || echo Copying to "$(NAME)-`cat VERSION`-sources.jar"... + @[ "$(SJAR_FLAGS)" = "" ] || [ ! -e VERSION ] || cp $(NAME)-sources.jar "$(NAME)-`cat VERSION`-sources.jar" + @echo "Main-Class: `echo "$(MAIN)" | sed 's:/:.:g'`" > bin/manifest + @echo >> bin/manifest + $(JAR) cfm $(NAME).jar bin/manifest $(JAR_FLAGS) + @[ ! -e VERSION ] || echo Copying to "$(NAME)-`cat VERSION`.jar"... + @[ ! -e VERSION ] || cp $(NAME).jar "$(NAME)-`cat VERSION`.jar" + +run: + @[ -e bin/$(MAIN).class ] || echo You need to build the sources + @[ -e bin/$(MAIN).class ] + @echo Running "$(NAME)"... + $(JAVA) $(JAVA_FLAGS) $(MAIN) + +jrun: + @[ -e $(NAME).jar ] || echo You need to build the jar + @[ -e $(NAME).jar ] + @echo Running "$(NAME).jar"... + $(RJAR) $(RJAR_FLAGS) $(NAME).jar + +run-test: + @[ "$(TEST)" = "" -o -e "bin/$(TEST).class" ] || echo You need to build the test sources + @[ "$(TEST)" = "" -o -e "bin/$(TEST).class" ] + @echo Running tests for "$(NAME)"... + @[ "$(TEST)" != "" ] || echo No test sources defined. + [ "$(TEST)" = "" ] || ( clear ; $(JAVA) $(JAVA_FLAGS) $(TEST) $(TEST_PARAMS) ) + +install: + @[ -e $(NAME).jar ] || echo You need to build the jar + @[ -e $(NAME).jar ] + mkdir -p "$(PREFIX)/lib" "$(PREFIX)/bin" + cp $(NAME).jar "$(PREFIX)/lib/" + echo "#!/bin/sh" > "$(PREFIX)/bin/$(NAME)" + echo "$(RJAR) $(RJAR_FLAGS) \"$(PREFIX)/lib/$(NAME).jar\" \"\$$@\"" >> "$(PREFIX)/bin/$(NAME)" + chmod a+rx "$(PREFIX)/bin/$(NAME)" + diff --git a/README.md b/README.md new file mode 100644 index 0000000..d7feec7 --- /dev/null +++ b/README.md @@ -0,0 +1,39 @@ +# GoFetch + +GoFetch is a simple web scrapper that outputs gopher-ready files. +You point it to your gopher directory, you launch it, and you have a +gopher view of the supported news sites. + +## Supported websites + +- Slashdot: News for nerds, stuff that matters! + +## Supported platforms + +Any platform with at lest Java 1.6 on it should be ok. + +## Usage + +```java -jar gofetch.jar [dir] [selector] [type] [max] [hostname] [port]``` + +- dir: the target directory where to store the files +- selector: the gopher selector to prepend (also a sub-directory in [dir]) +- max: the maximum number of stories to show on the main page +- hostname: the gopher hostname +- port: the gopher port + +## Compilation + +```./configure.sh && make``` + +You can also import the java sources into, say, [Eclipse](https://eclipse.org/), and create a runnable JAR file from there. + +### Dependant libraries (included) + +- libs/nikiroo-utils-sources.jar: some shared utility functions I also use elsewhere +- [libs/jsoup-sources.jar](https://jsoup.org/): a nice library to parse HTML + +Nothing else but Java 1.6+. + +Note that calling ```make libs``` will export the libraries into the src/ directory. + diff --git a/VERSION b/VERSION new file mode 100644 index 0000000..8acdd82 --- /dev/null +++ b/VERSION @@ -0,0 +1 @@ +0.0.1 diff --git a/changelog.md b/changelog.md new file mode 100644 index 0000000..81e28f3 --- /dev/null +++ b/changelog.md @@ -0,0 +1,7 @@ +# Gofetch + +## Version 0.0.1 + +- First version +- Slashdot supported + diff --git a/configure.sh b/configure.sh new file mode 100755 index 0000000..b2317b2 --- /dev/null +++ b/configure.sh @@ -0,0 +1,68 @@ +#!/bin/sh + +# default: +PREFIX=/usr/local +PROGS="java javac jar make sed" + +valid=true +while [ "$*" != "" ]; do + key=`echo "$1" | cut -f1 -d=` + val=`echo "$1" | cut -f2 -d=` + case "$key" in + --) + ;; + --help) # This help message + echo The following arguments can be used: + cat "$0" | grep '^\s*--' | grep '#' | while read ln; do + cmd=`echo "$ln" | cut -f1 -d')'` + msg=`echo "$ln" | cut -f2 -d'#'` + echo " $cmd$msg" + done + ;; + --prefix) #=PATH Change the prefix to the given path + PREFIX="$val" + ;; + *) + echo "Unsupported parameter: '$1'" >&2 + echo >&2 + sh "$0" --help >&2 + valid=false + ;; + esac + shift +done + +[ $valid = false ] && exit 1 + +MESS="A required program cannot be found:" +for prog in $PROGS; do + out="`whereis -b "$prog" 2>/dev/null`" + if [ "$out" = "$prog:" ]; then + echo "$MESS $prog" >&2 + valid=false + fi +done + +[ $valid = false ] && exit 2 + +if [ "`whereis tput`" = "tput:" ]; then + ok='"[ ok ]"'; + ko='"[ !! ]"'; + cols=80; +else + ok='"`tput bold`[`tput setf 2` OK `tput init``tput bold`]`tput init`"'; + ko='"`tput bold`[`tput setf 4` !! `tput init``tput bold`]`tput init`"'; + cols='"`tput cols`"'; +fi; + +echo "MAIN = be/nikiroo/gofetch/Main" > Makefile +echo "MORE = " >> Makefile +echo "TEST = " >> Makefile +echo "TEST_PARAMS = $cols $ok $ko" >> Makefile +echo "NAME = gofetch" >> Makefile +echo "PREFIX = $PREFIX" >> Makefile +echo "JAR_FLAGS += -C bin/ org -C bin/ be -C bin/ VERSION" >> Makefile +#echo "SJAR_FLAGS += -C src/ org -C src/ jexer -C src/ be -C ./ LICENSE -C ./ README.md -C ./ VERSION" >> Makefile + +cat Makefile.base >> Makefile + diff --git a/libs/jsoup-1.10.3-sources.jar b/libs/jsoup-1.10.3-sources.jar new file mode 100644 index 0000000..1fe0db4 Binary files /dev/null and b/libs/jsoup-1.10.3-sources.jar differ diff --git a/libs/nikiroo-utils-2.1.0-sources.jar b/libs/nikiroo-utils-2.1.0-sources.jar new file mode 100644 index 0000000..e7975f4 Binary files /dev/null and b/libs/nikiroo-utils-2.1.0-sources.jar differ diff --git a/src/be/nikiroo/gofetch/Fetcher.java b/src/be/nikiroo/gofetch/Fetcher.java new file mode 100644 index 0000000..5a9c774 --- /dev/null +++ b/src/be/nikiroo/gofetch/Fetcher.java @@ -0,0 +1,199 @@ +package be.nikiroo.gofetch; + +import java.io.File; +import java.io.FileWriter; +import java.io.FilenameFilter; +import java.io.IOException; +import java.util.Arrays; +import java.util.List; + +import be.nikiroo.gofetch.data.Comment; +import be.nikiroo.gofetch.data.Story; +import be.nikiroo.gofetch.output.Gopher; +import be.nikiroo.gofetch.output.Html; +import be.nikiroo.gofetch.output.Output; +import be.nikiroo.gofetch.support.BasicSupport; +import be.nikiroo.gofetch.support.BasicSupport.Type; +import be.nikiroo.utils.IOUtils; + +/** + * The class that will manage the fetch operations. + *

+ * It will scrap the required websites and process them to disk. + * + * @author niki + */ +public class Fetcher { + private File dir; + private String preselector; + private int maxStories; + private String hostname; + private int port; + private Type type; + + /** + * Prepare a new {@link Fetcher}. + * + * @param dir + * the target directory where to save the files (won't have + * impact on the files' content) + * @param preselector + * the sub directory and (pre-)selector to use for the resources + * (will have an impact on the files' content) + * @param type + * the type of news to get (or the special keyword ALL to get all + * of the supported sources) + * @param maxStories + * the maximum number of stories to show on the resume page + * @param hostname + * the gopher host to use (will have an impact on the + * files' content) + * @param port + * the gopher port to use (will have an impact on the + * files' content) + */ + public Fetcher(File dir, String preselector, Type type, int maxStories, + String hostname, int port) { + this.dir = dir; + this.preselector = preselector; + this.type = type; + this.maxStories = maxStories; + this.hostname = hostname; + this.port = port; + } + + /** + * Start the fetching operation. + *

+ * This method will handle the main pages itself, and will call + * {@link Fetcher#list(BasicSupport)} for the stories. + * + * @throws IOException + * in case of I/O error + */ + public void start() throws IOException { + File cache = new File(dir, preselector); + cache.mkdirs(); + File cacheHtml = new File(cache, "index.html"); + cache = new File(cache, ".cache"); + + Output gopher = new Gopher(null, hostname, port); + Output html = new Html(null); + + FileWriter writer = new FileWriter(cache); + try { + FileWriter writerHtml = new FileWriter(cacheHtml); + try { + writer.append(gopher.getIndexHeader()); + writerHtml.append(html.getIndexHeader()); + + Type types[]; + if (type == null) { + types = Type.values(); + } else { + types = new Type[] { type }; + } + + BasicSupport.setPreselector(preselector); + for (Type type : types) { + BasicSupport support = BasicSupport.getSupport(type); + list(support); + + writer.append("1" + support.getDescription()).append("\t") + .append("1" + support.getSelector()) // + .append("\t").append(hostname) // + .append("\t").append(Integer.toString(port)) // + .append("\r\n"); + String ref = support.getSelector(); + while (ref.startsWith("/")) { + ref = ref.substring(1); + } + writerHtml.append("

" + support.getDescription() + "
"); + } + + writer.append(gopher.getIndexFooter()); + writerHtml.append(html.getIndexFooter()); + } finally { + writerHtml.close(); + } + } finally { + writer.close(); + } + } + + /** + * Process the stories for the given {@link BasicSupport} to disk. + * + * @param support + * the {@link BasicSupport} to download from + * + * @throws IOException + * in case of I/O error + **/ + private void list(BasicSupport support) throws IOException { + Output gopher = new Gopher(support.getType(), hostname, port); + Output html = new Html(support.getType()); + + new File(dir, support.getSelector()).mkdirs(); + + System.err + .print("Listing recent news for " + support.getType() + "..."); + List stories = support.list(); + System.err.println(" " + stories.size() + " stories found!"); + int i = 1; + for (Story story : stories) { + IOUtils.writeSmallFile(dir, story.getSelector() + ".header", + gopher.export(story)); + IOUtils.writeSmallFile(dir, story.getSelector() + ".header.html", + html.export(story)); + + System.err.println(String.format("%02d/%02d", i, stories.size()) + + " Fetching comments for story " + story.getId() + "..."); + List comments = support.getComments(story); + + IOUtils.writeSmallFile(dir, story.getSelector(), + gopher.export(story, comments)); + IOUtils.writeSmallFile(dir, story.getSelector() + ".html", + html.export(story, comments)); + + i++; + } + + File varDir = new File(dir, support.getSelector()); + String[] headers = varDir.list(new FilenameFilter() { + @Override + public boolean accept(File dir, String name) { + return name.endsWith(".header"); + } + }); + + File cache = new File(varDir, ".cache"); + File cacheHtml = new File(varDir, "index.html"); + FileWriter writer = new FileWriter(cache); + try { + FileWriter writerHtml = new FileWriter(cacheHtml); + try { + if (headers.length > 0) { + Arrays.sort(headers); + int from = headers.length - 1; + int to = headers.length - maxStories; + if (to < 0) { + to = 0; + } + for (i = from; i >= to; i--) { + writer.append(IOUtils.readSmallFile(new File(varDir, + headers[i]))); + + writerHtml.append(IOUtils.readSmallFile(new File( + varDir, headers[i] + ".html"))); + } + } + } finally { + writerHtml.close(); + } + } finally { + writer.close(); + } + } +} diff --git a/src/be/nikiroo/gofetch/Main.java b/src/be/nikiroo/gofetch/Main.java new file mode 100644 index 0000000..590d4a0 --- /dev/null +++ b/src/be/nikiroo/gofetch/Main.java @@ -0,0 +1,105 @@ +package be.nikiroo.gofetch; + +import java.io.File; +import java.io.IOException; + +import be.nikiroo.gofetch.support.BasicSupport.Type; + +/** + * This class is tha main entry point of the program. It will parse the + * arguments, checks them (and warn-and-exit if they are invalid) then call + * {@link Fetcher#start()}. + * + * @author niki + */ +public class Main { + /** + * Main entry point. + * + * @param args + * save-to-dir selector-subdir type max hostname port + * + * @throws IOException + * in case of I/O error + */ + public static void main(String[] args) throws IOException { + if (args.length < 6) { + System.err + .println("Syntax error: gofecth [target dir] [selector] [type or 'ALL'] [max stories] [hostname] [port]"); + System.exit(1); + } + + String dirStr = args[0]; + String preselectorStr = args[1]; + String typeStr = args[2]; + String maxStoriesStr = args[3]; + String hostnameStr = args[4]; + String portStr = args[5]; + + // Dir + File dir = new File(dirStr); + dir.mkdirs(); + + if (!dir.exists()) { + System.err.println("Cannot open/create the root directory: " + + dirStr); + System.exit(1); + } + + if (dir.isFile()) { + System.err + .println("Root directory exists and is a file: " + dirStr); + System.exit(1); + } + + // Selector base (empty is ok, DO NOT end with /) + String preselector = ""; + if (preselectorStr != null && !preselectorStr.startsWith("/")) { + preselector = "/" + preselectorStr; + } + while (preselector.endsWith("/")) { + preselector = preselector.substring(0, preselector.length() - 1); + } + + // Type to download + Type type = null; + if (!"ALL".equals(typeStr)) { + try { + Type.valueOf(typeStr.toUpperCase()); + } catch (IllegalArgumentException e) { + System.err.println("Invalid type: " + typeStr); + System.exit(1); + } + } + + // Max number of stories to display in the cache + int maxStories = 0; + try { + maxStories = Integer.parseInt(maxStoriesStr); + } catch (NumberFormatException e) { + System.err + .println("The maximum number of stories cannot be parsed: " + + maxStoriesStr); + System.exit(1); + } + + // + String hostname = hostnameStr; + + // + int port = 0; + try { + port = Integer.parseInt(portStr); + } catch (NumberFormatException e) { + System.err.println("The port cannot be parsed: " + portStr); + System.exit(1); + } + + if (port < 0 || port > 65535) { + System.err.println("Invalid port number: " + portStr); + System.exit(1); + } + + new Fetcher(dir, preselector, type, maxStories, hostname, port).start(); + } +} \ No newline at end of file diff --git a/src/be/nikiroo/gofetch/StringJustifier.java b/src/be/nikiroo/gofetch/StringJustifier.java new file mode 100644 index 0000000..e9e6579 --- /dev/null +++ b/src/be/nikiroo/gofetch/StringJustifier.java @@ -0,0 +1,244 @@ +/* + * This file was taken from: + * Jexer - Java Text User Interface + * + * The MIT License (MIT) + * + * Copyright (C) 2017 Kevin Lamonte + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * @author Kevin Lamonte [kevin.lamonte@gmail.com] + * @version 1 + */ +package be.nikiroo.gofetch; + +import java.util.LinkedList; +import java.util.List; + +/** + * StringJustifier contains methods to convert one or more long lines of strings + * into justified text paragraphs. + */ +public class StringJustifier { + + /** + * Left-justify a string into a list of lines. + * + * @param str + * the string + * @param n + * the maximum number of characters in a line + * @return the list of lines + */ + public static List left(final String str, final int n) { + List result = new LinkedList(); + + /* + * General procedure: + * + * 1. Split on '\n' into paragraphs. + * + * 2. Scan each line, noting the position of the last + * beginning-of-a-word. + * + * 3. Chop at the last #2 if the next beginning-of-a-word exceeds n. + * + * 4. Return the lines. + */ + + String[] rawLines = str.split("\n"); + for (int i = 0; i < rawLines.length; i++) { + StringBuilder line = new StringBuilder(); + StringBuilder word = new StringBuilder(); + boolean inWord = false; + for (int j = 0; j < rawLines[i].length(); j++) { + char ch = rawLines[i].charAt(j); + if ((ch == ' ') || (ch == '\t')) { + if (inWord == true) { + // We have just transitioned from a word to + // whitespace. See if we have enough space to add + // the word to the line. + if (word.length() + line.length() > n) { + // This word will exceed the line length. Wrap + // at it instead. + result.add(line.toString()); + line = new StringBuilder(); + } + if ((word.toString().startsWith(" ")) + && (line.length() == 0)) { + line.append(word.substring(1)); + } else { + line.append(word); + } + word = new StringBuilder(); + word.append(ch); + inWord = false; + } else { + // We are in the whitespace before another word. Do + // nothing. + } + } else { + if (inWord == true) { + // We are appending to a word. + word.append(ch); + } else { + // We have transitioned from whitespace to a word. + word.append(ch); + inWord = true; + } + } + } // for (int j = 0; j < rawLines[i].length(); j++) + + if (word.length() + line.length() > n) { + // This word will exceed the line length. Wrap at it + // instead. + result.add(line.toString()); + line = new StringBuilder(); + } + if ((word.toString().startsWith(" ")) && (line.length() == 0)) { + line.append(word.substring(1)); + } else { + line.append(word); + } + result.add(line.toString()); + } // for (int i = 0; i < rawLines.length; i++) { + + return result; + } + + /** + * Right-justify a string into a list of lines. + * + * @param str + * the string + * @param n + * the maximum number of characters in a line + * @return the list of lines + */ + public static List right(final String str, final int n) { + List result = new LinkedList(); + + /* + * Same as left(), but preceed each line with spaces to make it n chars + * long. + */ + List lines = left(str, n); + for (String line : lines) { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < n - line.length(); i++) { + sb.append(' '); + } + sb.append(line); + result.add(sb.toString()); + } + + return result; + } + + /** + * Center a string into a list of lines. + * + * @param str + * the string + * @param n + * the maximum number of characters in a line + * @return the list of lines + */ + public static List center(final String str, final int n) { + List result = new LinkedList(); + + /* + * Same as left(), but preceed/succeed each line with spaces to make it + * n chars long. + */ + List lines = left(str, n); + for (String line : lines) { + StringBuilder sb = new StringBuilder(); + int l = (n - line.length()) / 2; + int r = n - line.length() - l; + for (int i = 0; i < l; i++) { + sb.append(' '); + } + sb.append(line); + for (int i = 0; i < r; i++) { + sb.append(' '); + } + result.add(sb.toString()); + } + + return result; + } + + /** + * Fully-justify a string into a list of lines. + * + * @param str + * the string + * @param n + * the maximum number of characters in a line + * @return the list of lines + */ + public static List full(final String str, final int n) { + List result = new LinkedList(); + + /* + * Same as left(), but insert spaces between words to make each line n + * chars long. The "algorithm" here is pretty dumb: it performs a split + * on space and then re-inserts multiples of n between words. + */ + List lines = left(str, n); + for (int lineI = 0; lineI < lines.size() - 1; lineI++) { + String line = lines.get(lineI); + String[] words = line.split(" "); + if (words.length > 1) { + int charCount = 0; + for (int i = 0; i < words.length; i++) { + charCount += words[i].length(); + } + int spaceCount = n - charCount; + int q = spaceCount / (words.length - 1); + int r = spaceCount % (words.length - 1); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < words.length - 1; i++) { + sb.append(words[i]); + for (int j = 0; j < q; j++) { + sb.append(' '); + } + if (r > 0) { + sb.append(' '); + r--; + } + } + for (int j = 0; j < r; j++) { + sb.append(' '); + } + sb.append(words[words.length - 1]); + result.add(sb.toString()); + } else { + result.add(line); + } + } + if (lines.size() > 0) { + result.add(lines.get(lines.size() - 1)); + } + + return result; + } +} diff --git a/src/be/nikiroo/gofetch/data/Comment.java b/src/be/nikiroo/gofetch/data/Comment.java new file mode 100644 index 0000000..44c0de1 --- /dev/null +++ b/src/be/nikiroo/gofetch/data/Comment.java @@ -0,0 +1,77 @@ +package be.nikiroo.gofetch.data; + +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +public class Comment implements Iterable { + private String id; + private String author; + private String title; + private String date; + private String content; + private List children; + + public Comment(String id, String author, String title, String date, + String content) { + this.id = id; + this.author = author; + this.title = title; + this.date = date; + this.content = content; + this.children = new ArrayList(); + } + + public void add(Comment comment) { + children.add(comment); + } + + public void addAll(List comments) { + children.addAll(comments); + } + + /** + * @return the id + */ + public String getId() { + return id; + } + + /** + * @return the author + */ + public String getAuthor() { + return author; + } + + /** + * @return the title + */ + public String getTitle() { + return title; + } + + /** + * @return the date + */ + public String getDate() { + return date; + } + + /** + * @return the content + */ + public String getContent() { + return content; + } + + public boolean isEmpty() { + return children.isEmpty() + && ("" + author + title + content).trim().isEmpty(); + } + + @Override + public Iterator iterator() { + return children.iterator(); + } +} diff --git a/src/be/nikiroo/gofetch/data/Story.java b/src/be/nikiroo/gofetch/data/Story.java new file mode 100644 index 0000000..aa5aecc --- /dev/null +++ b/src/be/nikiroo/gofetch/data/Story.java @@ -0,0 +1,96 @@ +package be.nikiroo.gofetch.data; + +import java.net.URL; + +import be.nikiroo.gofetch.support.BasicSupport; +import be.nikiroo.gofetch.support.BasicSupport.Type; + +/** + * A news story. + * + * @author niki + */ +public class Story { + private Type type; + private String id; + private String title; + private String details; + private String urlInternal; + private String urlExternal; + private String content; + + /** + * Create a news story. + * + * @param type + * the source {@link Type} + * @param id + * the news ID + * @param title + * the news title + * @param details + * some details to add to the title + * @param urlInternal + * the {@link URL} to get this news on the associated news site + * @param urlExternal + * an external {@link URL} that serve as the news' source, if any + * @param content + * the story content + */ + public Story(Type type, String id, String title, String details, + String urlInternal, String urlExternal, String content) { + this.type = type; + this.id = id; + this.title = title; + this.details = details; + this.urlInternal = urlInternal; + this.urlExternal = urlExternal; + this.content = content; + } + + public String getSelector() { + return BasicSupport.getSelector(type) + id; + } + + /** + * @return the id + */ + public String getId() { + return id; + } + + /** + * @return the title + */ + public String getTitle() { + return title; + } + + /** + * @return the details + */ + public String getDetails() { + return details; + } + + /** + * @return the url + */ + public String getUrlInternal() { + return urlInternal; + } + + /** + * @return the urlExternal + */ + public String getUrlExternal() { + return urlExternal; + } + + /** + * @return the body + */ + public String getContent() { + return content; + } +} \ No newline at end of file diff --git a/src/be/nikiroo/gofetch/output/Gopher.java b/src/be/nikiroo/gofetch/output/Gopher.java new file mode 100644 index 0000000..8cce4a5 --- /dev/null +++ b/src/be/nikiroo/gofetch/output/Gopher.java @@ -0,0 +1,131 @@ +package be.nikiroo.gofetch.output; + +import java.util.List; + +import be.nikiroo.gofetch.StringJustifier; +import be.nikiroo.gofetch.data.Comment; +import be.nikiroo.gofetch.data.Story; +import be.nikiroo.gofetch.support.BasicSupport.Type; + +public class Gopher extends Output { + static private final int LINE_SIZE = 70; + + private String hostname; + private int port; + + public Gopher(Type type, String hostname, int port) { + super(type); + + this.hostname = hostname; + this.port = port; + } + + @Override + public String getIndexHeader() { + return "iHello world!\r\niThis is my news site.\r\ni\r\n"; + } + + @Override + public String getIndexFooter() { + return ""; + } + + @Override + public String export(Story story) { + return append(new StringBuilder(), story, false).append("i\r\ni\r\n") + .toString(); + } + + @Override + public String export(Story story, List comments) { + StringBuilder builder = new StringBuilder(); + append(builder, story, true); + + builder.append("i\r\n"); + + for (Comment comment : comments) { + append(builder, comment, ""); + } + + builder.append("i\r\n"); + + return builder.toString(); + } + + private StringBuilder append(StringBuilder builder, Comment comment, + String space) { + + if (space.length() > LINE_SIZE - 20) { + space = space.substring(0, LINE_SIZE - 20); + } + + appendLeft(builder, comment.getTitle(), ">> ", " ", space); + appendLeft(builder, "(" + comment.getAuthor() + ")", " ", " ", + space); + + builder.append("i\r\n"); + + appendLeft(builder, comment.getContent(), " ", " ", space); + + builder.append("i\r\n"); + for (Comment subComment : comment) { + append(builder, subComment, space + " "); + builder.append("i\r\n"); + } + + return builder; + } + + private StringBuilder append(StringBuilder builder, Story story, + boolean links) { + if (links) { + appendCenter(builder, story.getTitle(), true); + builder.append("i\r\n"); + appendLeft(builder, story.getDetails(), "", "", " "); + builder.append("i\r\n"); + builder.append("i o News link: ").append(story.getUrlInternal()); + builder.append("i\r\n"); + builder.append("i o Source link: ").append(story.getUrlExternal()); + builder.append("i\r\n"); + } else { + builder.append('1').append(story.getTitle()) // + .append('\t').append("0").append(story.getSelector()) // + .append('\t').append(hostname) // + .append('\t').append(port) // + .append("\r\n"); + appendLeft(builder, story.getDetails(), "", "", " "); + } + + builder.append("i\r\n"); + + appendLeft(builder, story.getContent(), "", "", " "); + + builder.append("i\r\n"); + + return builder; + } + + // note: adds "i" + private static void appendCenter(StringBuilder builder, String text, + boolean allCaps) { + if (allCaps) { + text = text.toUpperCase(); + } + + for (String line : StringJustifier.center(text, LINE_SIZE)) { + builder.append("i").append(line).append("\r\n"); + } + } + + // note: adds "i" + private static void appendLeft(StringBuilder builder, String text, + String prependFirst, String prependOthers, String space) { + String prepend = prependFirst; + for (String line : StringJustifier.left(text, + LINE_SIZE - space.length())) { + builder.append("i").append(space).append(prepend).append(line) + .append("\r\n"); + prepend = prependOthers; + } + } +} diff --git a/src/be/nikiroo/gofetch/output/Html.java b/src/be/nikiroo/gofetch/output/Html.java new file mode 100644 index 0000000..ab7489b --- /dev/null +++ b/src/be/nikiroo/gofetch/output/Html.java @@ -0,0 +1,81 @@ +package be.nikiroo.gofetch.output; + +import java.util.List; + +import be.nikiroo.gofetch.data.Comment; +import be.nikiroo.gofetch.data.Story; +import be.nikiroo.gofetch.support.BasicSupport.Type; + +public class Html extends Output { + public Html(Type type) { + super(type); + } + + @Override + public String getIndexHeader() { + return "

Hello world!


TODO\n"; + } + + @Override + public String getIndexFooter() { + return ""; + } + + @Override + public String export(Story story) { + return appendHtml(new StringBuilder(), story, true).append("
\n") + .toString(); + } + + @Override + public String export(Story story, List comments) { + StringBuilder builder = new StringBuilder(); + appendHtml(builder, story, false); + + // TODO: ext link and link + + builder.append("
"); + for (Comment comment : comments) { + appendHtml(builder, comment, " "); + } + + return builder.toString(); + } + + private void appendHtml(StringBuilder builder, Comment comment, String space) { + builder.append(space).append( + "
"); + builder.append(space).append("

").append(comment.getTitle()) + .append("

\n"); + builder.append(space).append("
") + .append(comment.getAuthor()).append("
\n"); + builder.append(space).append("
") + .append(comment.getContent()).append("
\n"); + for (Comment subComment : comment) { + appendHtml(builder, subComment, space + " "); + } + builder.append(space).append("
"); + } + + private StringBuilder appendHtml(StringBuilder builder, Story story, + boolean links) { + // TODO + builder.append("
"); + if (links) { + builder.append("

" + + story.getTitle() + "

"); + } else { + builder.append("

" + story.getTitle() + "

"); + } + builder.append("
(" + story.getDetails() + + ")
"); + builder.append("
"); + builder.append("
"); + builder.append(" " + story.getContent()); + builder.append("
"); + builder.append("
"); + + return builder; + } +} diff --git a/src/be/nikiroo/gofetch/output/Output.java b/src/be/nikiroo/gofetch/output/Output.java new file mode 100644 index 0000000..b12596e --- /dev/null +++ b/src/be/nikiroo/gofetch/output/Output.java @@ -0,0 +1,65 @@ +package be.nikiroo.gofetch.output; + +import java.util.List; + +import be.nikiroo.gofetch.data.Comment; +import be.nikiroo.gofetch.data.Story; +import be.nikiroo.gofetch.support.BasicSupport.Type; + +/** + * Base class for output operations. + * + * @author niki + */ +public abstract class Output { + /** + * The type of source, can be NULL for no-type. + */ + protected Type type; + + /** + * Create a new {@link Output} class for the given type (which can be NULL). + * + * @param type + * the type or NULL for no type + */ + public Output(Type type) { + this.type = type; + } + + /** + * Get the header to use in the index file. + * + * @return the header + */ + abstract public String getIndexHeader(); + + /** + * Get the footer to use in the index file. + * + * @return the footer + */ + abstract public String getIndexFooter(); + + /** + * Export a story (in resume mode). + * + * @param story + * the story + * + * @return the resume + */ + abstract public String export(Story story); + + /** + * Export a full story with comments. + * + * @param story + * the story + * @param comments + * the comments + * + * @return the story + */ + abstract public String export(Story story, List comments); +} diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java new file mode 100644 index 0000000..1105df5 --- /dev/null +++ b/src/be/nikiroo/gofetch/support/BasicSupport.java @@ -0,0 +1,81 @@ +package be.nikiroo.gofetch.support; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.net.URLConnection; +import java.util.List; +import java.util.zip.GZIPInputStream; + +import be.nikiroo.gofetch.data.Comment; +import be.nikiroo.gofetch.data.Story; + +public abstract class BasicSupport { + public enum Type { + SLASHDOT, + } + + static private String preselector; + + private Type type; + + abstract public List list() throws IOException; + + abstract public List getComments(Story story) throws IOException; + + abstract public String getDescription(); + + public String getSelector() { + return getSelector(type); + } + + public Type getType() { + return type; + } + + protected void setType(Type type) { + this.type = type; + } + + /** + * @param preselector + * the preselector to set + */ + static public void setPreselector(String preselector) { + BasicSupport.preselector = preselector; + } + + static public BasicSupport getSupport(Type type) { + BasicSupport support = null; + + if (type != null) { + switch (type) { + case SLASHDOT: + support = new Slashdot(); + break; + } + + if (support != null) { + support.setType(type); + } + } + + return support; + } + + static public String getSelector(Type type) { + return preselector + "/" + type + "/"; + } + + // TODO: check Downloader.java? + static protected InputStream open(URL url) throws IOException { + URLConnection conn = url.openConnection(); + conn.connect(); + InputStream in = conn.getInputStream(); + if ("gzip".equals(conn.getContentEncoding())) { + in = new GZIPInputStream(in); + } + + return in; + } +} diff --git a/src/be/nikiroo/gofetch/support/Slashdot.java b/src/be/nikiroo/gofetch/support/Slashdot.java new file mode 100644 index 0000000..5dfa03b --- /dev/null +++ b/src/be/nikiroo/gofetch/support/Slashdot.java @@ -0,0 +1,126 @@ +package be.nikiroo.gofetch.support; + +import java.io.IOException; +import java.io.InputStream; +import java.net.URL; +import java.util.ArrayList; +import java.util.List; + +import org.jsoup.helper.DataUtil; +import org.jsoup.nodes.Document; +import org.jsoup.nodes.Element; +import org.jsoup.select.Elements; + +import be.nikiroo.gofetch.data.Comment; +import be.nikiroo.gofetch.data.Story; + +public class Slashdot extends BasicSupport { + @Override + public String getDescription() { + return "Slashdot: News for nerds, stuff that matters!"; + } + + @Override + public List list() throws IOException { + List list = new ArrayList(); + + URL url = new URL("https://slashdot.org/"); + InputStream in = open(url); + Document doc = DataUtil.load(in, "UTF-8", url.toString()); + Elements stories = doc.getElementsByTag("header"); + for (Element story : stories) { + Elements titles = story.getElementsByClass("story-title"); + if (titles.size() == 0) { + continue; + } + Element title = titles.get(0); + + String id = "" + title.attr("id"); + if (id.startsWith("title-")) { + id = id.substring("title-".length()); + } + + Elements links = title.getElementsByTag("a"); + String intUrl = null; + String extUrl = null; + if (links.size() > 0) { + intUrl = links.get(0).absUrl("href"); + } + if (links.size() > 1) { + extUrl = links.get(1).absUrl("href"); + } + + String details = ""; + Elements detailsElements = story.getElementsByClass("details"); + if (detailsElements.size() > 0) { + details = detailsElements.get(0).text(); + } + + String body = ""; + Element bodyElement = doc.getElementById("text-" + id); + if (bodyElement != null) { + body = bodyElement.text(); + } + + list.add(new Story(getType(), id, title.text(), details, intUrl, + extUrl, body)); + } + + return list; + } + + @Override + public List getComments(Story story) throws IOException { + List comments = new ArrayList(); + + URL url = new URL(story.getUrlInternal()); + InputStream in = open(url); + Document doc = DataUtil.load(in, "UTF-8", url.toString()); + Element listing = doc.getElementById("commentlisting"); + if (listing != null) { + comments.addAll(getComments(listing)); + } + + return comments; + } + + private List getComments(Element listing) { + List comments = new ArrayList(); + for (Element commentElement : listing.children()) { + if (commentElement.hasClass("comment")) { + Comment comment = getComment(commentElement); + if (!comment.isEmpty()) { + comments.add(comment); + } + } + } + return comments; + } + + private Comment getComment(Element commentElement) { + String title = firstOrEmpty(commentElement, "title"); + String author = firstOrEmpty(commentElement, "by"); + String content = firstOrEmpty(commentElement, "commentBody"); + String date = firstOrEmpty(commentElement, "otherdetails"); + + Comment comment = new Comment(commentElement.id(), author, title, date, + content); + + for (Element child : commentElement.children()) { + if (child.id().contains("commtree_")) { + comment.addAll(getComments(child)); + } + } + + return comment; + } + + private String firstOrEmpty(Element element, String className) { + Elements subElements = element.getElementsByClass(className); + if (subElements.size() > 0) { + return subElements.get(0).text(); + } + + return ""; + } +}