X-Git-Url: https://git.nikiroo.be/?a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Fgofetch%2Fsupport%2FBasicSupport.java;h=8fc259a19daa84d387edb1a14b3b9d2adf7583b2;hb=c9cffa913fe4ebc5cbe483cc5afe676e6cb54abd;hp=102023eb051a02b6c13a41e5ddc0d64e98743156;hpb=100a839503d23e324d2db3f6d3e47892def3bf81;p=gofetch.git diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java index 102023e..8fc259a 100644 --- a/src/be/nikiroo/gofetch/support/BasicSupport.java +++ b/src/be/nikiroo/gofetch/support/BasicSupport.java @@ -1,12 +1,11 @@ package be.nikiroo.gofetch.support; import java.io.IOException; -import java.io.InputStream; -import java.net.URL; -import java.net.URLConnection; +import java.text.ParseException; +import java.text.SimpleDateFormat; import java.util.ArrayList; +import java.util.Date; import java.util.List; -import java.util.zip.GZIPInputStream; import org.jsoup.helper.StringUtil; import org.jsoup.nodes.Element; @@ -17,31 +16,120 @@ import org.jsoup.select.NodeTraversor; import org.jsoup.select.NodeVisitor; import be.nikiroo.gofetch.data.Story; +import be.nikiroo.utils.Downloader; +/** + * Base class for website support. + * + * @author niki + */ public abstract class BasicSupport { + /** The downloader to use for all websites. */ + protected static Downloader downloader = new Downloader("gofetcher"); + + /** + * The support type (each website we support has a single type). + * + * @author niki + */ public enum Type { - SLASHDOT, PIPEDOT, LWN, LEMONDE, + /** EN: Any, but mostly IT/Sci */ + SLASHDOT, + /** EN: Clone of Slashdot, mostly abandoned */ + PIPEDOT, + /** EN: Linux */ + LWN, + /** FR: Any */ + LEMONDE, + /** EN: IT */ + REGISTER, + /** FR: Linux */ + TOO_LINUX, + /** FR: IT */ + ERE_NUMERIQUE, } - public interface QuoteProcessor { + /** + * Used to process an element into lines. + * + * @author niki + */ + public interface ElementProcessor { + /** + * Detect if this node is a quote and should be trated as such. + * + * @param node + * the node to check + * @return TRUE if it is + */ public boolean detectQuote(Node node); + /** + * Process text content (will be called on each text element, allowing + * you to modify it if needed). + * + * @param text + * the text to process + * + * @return the resulting text + */ public String processText(String text); + /** + * Ignore this node. + * + * @param node + * the node to ignore + * @return TRUE if it has to be ignored + */ public boolean ignoreNode(Node node); /** - * Manually process this node if so desired. + * Manually process this node (and return the manual processing value) + * if so desired. + *
+ * If the node is manually processed, it and its children will not be + * automatically processed. * * @param node * the node to optionally process * - * @return NULL if not processed, a {@link String} (may be empty) if we - * must not process it any further + * @return NULL if not processed (will thus be automatically processed + * as usual), a {@link String} (may be empty) if we process it + * manually -- the given {@link String} will be used instead of + * the usual automatic processing if not NULL */ public String manualProcessing(Node node); } + /** + * A default {@link ElementProcessor} (will not detect or process anything + * manually). + * + * @author niki + */ + public class BasicElementProcessor implements ElementProcessor { + @Override + public boolean detectQuote(Node node) { + return false; + } + + @Override + public String processText(String text) { + return text; + } + + @Override + public boolean ignoreNode(Node node) { + return false; + } + + @Override + public String manualProcessing(Node node) { + return null; + } + } + static private String preselector; private Type type; @@ -70,21 +158,49 @@ public abstract class BasicSupport { */ abstract public void fetch(Story story) throws IOException; + /** + * The website textual description, to add in the dispatcher page. + *
+ * Should be short. + * + * @return the description + */ abstract public String getDescription(); + /** + * The gopher "selector" to use for output. + *
+ * A kind of "URL path", like "/news/" or "/misc/news/" or... + * + * @return the selector + */ public String getSelector() { return getSelector(type); } + /** + * The support type. + * + * @return the type + */ public Type getType() { return type; } + /** + * The support type. + * + * @param type + * the new type + */ protected void setType(Type type) { this.type = type; } /** + * The {@link String} to append to the selector (the selector will be + * constructed as "this string" then "/type/". + * * @param preselector * the preselector to set */ @@ -92,6 +208,15 @@ public abstract class BasicSupport { BasicSupport.preselector = preselector; } + /** + * Return a {@link BasicSupport} that is compatible with the given + * {@link Type} if it exists (or NULL if not). + * + * @param type + * the type + * + * @return a compatible {@link BasicSupport} if it exists (or NULL if not) + */ static public BasicSupport getSupport(Type type) { BasicSupport support = null; @@ -109,6 +234,15 @@ public abstract class BasicSupport { case LEMONDE: support = new LeMonde(); break; + case REGISTER: + support = new TheRegister(); + break; + case TOO_LINUX: + support = new TooLinux(); + break; + case ERE_NUMERIQUE: + support = new EreNumerique(); + break; } if (support != null) { @@ -119,22 +253,21 @@ public abstract class BasicSupport { return support; } + /** + * The gopher "selector" to use for output for this type, using the + * preselector. + *
+ * A kind of "URL path", like "/news/" or "/misc/news/" or...
+ *
+ * @param type
+ * the type to get the selector of
+ *
+ * @return the selector
+ */
static public String getSelector(Type type) {
return preselector + "/" + type + "/";
}
- // TODO: check Downloader.java?
- static protected InputStream open(URL url) throws IOException {
- URLConnection conn = url.openConnection();
- conn.connect();
- InputStream in = conn.getInputStream();
- if ("gzip".equals(conn.getContentEncoding())) {
- in = new GZIPInputStream(in);
- }
-
- return in;
- }
-
/**
* Get the first {@link Element} of the given class, or an empty span
* {@link Element} if none found.
@@ -175,8 +308,20 @@ public abstract class BasicSupport {
return new Element("span");
}
+ /**
+ * Process the given element into text (each line is a text paragraph and
+ * can be prepended with ">" signs to indicate a quote or sub-quote or
+ * sub-sub-quote...).
+ *
+ * @param element
+ * the element to process
+ * @param elementProcessor
+ * the element processor, must not be NULL
+ *
+ * @return text lines, each line is a paragraph
+ */
static protected List