X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;ds=inline;f=src%2Fbe%2Fnikiroo%2Fgofetch%2Fsupport%2FBasicSupport.java;h=8fc259a19daa84d387edb1a14b3b9d2adf7583b2;hb=c9cffa913fe4ebc5cbe483cc5afe676e6cb54abd;hp=7a1d0eab9da69291bc112dcbd7f67abb127a49c6;hpb=5c056aade2e020276e039f81acba7bcb2b12e87f;p=gofetch.git diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java index 7a1d0ea..8fc259a 100644 --- a/src/be/nikiroo/gofetch/support/BasicSupport.java +++ b/src/be/nikiroo/gofetch/support/BasicSupport.java @@ -1,23 +1,149 @@ package be.nikiroo.gofetch.support; import java.io.IOException; -import java.io.InputStream; -import java.net.URL; -import java.net.URLConnection; +import java.text.ParseException; +import java.text.SimpleDateFormat; +import java.util.ArrayList; +import java.util.Date; import java.util.List; -import java.util.zip.GZIPInputStream; + +import org.jsoup.helper.StringUtil; +import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; +import org.jsoup.nodes.TextNode; +import org.jsoup.select.Elements; +import org.jsoup.select.NodeTraversor; +import org.jsoup.select.NodeVisitor; import be.nikiroo.gofetch.data.Story; +import be.nikiroo.utils.Downloader; +/** + * Base class for website support. + * + * @author niki + */ public abstract class BasicSupport { + /** The downloader to use for all websites. */ + protected static Downloader downloader = new Downloader("gofetcher"); + + /** + * The support type (each website we support has a single type). + * + * @author niki + */ public enum Type { - SLASHDOT, PIPEDOT, LWN, + /** EN: Any, but mostly IT/Sci */ + SLASHDOT, + /** EN: Clone of Slashdot, mostly abandoned */ + PIPEDOT, + /** EN: Linux */ + LWN, + /** FR: Any */ + LEMONDE, + /** EN: IT */ + REGISTER, + /** FR: Linux */ + TOO_LINUX, + /** FR: IT */ + ERE_NUMERIQUE, + } + + /** + * Used to process an element into lines. + * + * @author niki + */ + public interface ElementProcessor { + /** + * Detect if this node is a quote and should be trated as such. + * + * @param node + * the node to check + * @return TRUE if it is + */ + public boolean detectQuote(Node node); + + /** + * Process text content (will be called on each text element, allowing + * you to modify it if needed). + * + * @param text + * the text to process + * + * @return the resulting text + */ + public String processText(String text); + + /** + * Ignore this node. + * + * @param node + * the node to ignore + * @return TRUE if it has to be ignored + */ + public boolean ignoreNode(Node node); + + /** + * Manually process this node (and return the manual processing value) + * if so desired. + *
+ * If the node is manually processed, it and its children will not be
+ * automatically processed.
+ *
+ * @param node
+ * the node to optionally process
+ *
+ * @return NULL if not processed (will thus be automatically processed
+ * as usual), a {@link String} (may be empty) if we process it
+ * manually -- the given {@link String} will be used instead of
+ * the usual automatic processing if not NULL
+ */
+ public String manualProcessing(Node node);
+ }
+
+ /**
+ * A default {@link ElementProcessor} (will not detect or process anything
+ * manually).
+ *
+ * @author niki
+ */
+ public class BasicElementProcessor implements ElementProcessor {
+ @Override
+ public boolean detectQuote(Node node) {
+ return false;
+ }
+
+ @Override
+ public String processText(String text) {
+ return text;
+ }
+
+ @Override
+ public boolean ignoreNode(Node node) {
+ return false;
+ }
+
+ @Override
+ public String manualProcessing(Node node) {
+ return null;
+ }
}
static private String preselector;
private Type type;
+ /**
+ * List all the recent items, but only assure the ID and internal URL to
+ * fetch it later on (until it has been fetched, the rest of the
+ * {@link Story} is not confirmed).
+ *
+ * @return the list of new stories
+ *
+ * @throws IOException
+ * in case of I/O
+ */
abstract public List
+ * Should be short.
+ *
+ * @return the description
+ */
abstract public String getDescription();
+ /**
+ * The gopher "selector" to use for output.
+ *
+ * A kind of "URL path", like "/news/" or "/misc/news/" or...
+ *
+ * @return the selector
+ */
public String getSelector() {
return getSelector(type);
}
+ /**
+ * The support type.
+ *
+ * @return the type
+ */
public Type getType() {
return type;
}
+ /**
+ * The support type.
+ *
+ * @param type
+ * the new type
+ */
protected void setType(Type type) {
this.type = type;
}
/**
+ * The {@link String} to append to the selector (the selector will be
+ * constructed as "this string" then "/type/".
+ *
* @param preselector
* the preselector to set
*/
@@ -54,6 +208,15 @@ public abstract class BasicSupport {
BasicSupport.preselector = preselector;
}
+ /**
+ * Return a {@link BasicSupport} that is compatible with the given
+ * {@link Type} if it exists (or NULL if not).
+ *
+ * @param type
+ * the type
+ *
+ * @return a compatible {@link BasicSupport} if it exists (or NULL if not)
+ */
static public BasicSupport getSupport(Type type) {
BasicSupport support = null;
@@ -68,6 +231,18 @@ public abstract class BasicSupport {
case LWN:
support = new LWN();
break;
+ case LEMONDE:
+ support = new LeMonde();
+ break;
+ case REGISTER:
+ support = new TheRegister();
+ break;
+ case TOO_LINUX:
+ support = new TooLinux();
+ break;
+ case ERE_NUMERIQUE:
+ support = new EreNumerique();
+ break;
}
if (support != null) {
@@ -78,19 +253,203 @@ public abstract class BasicSupport {
return support;
}
+ /**
+ * The gopher "selector" to use for output for this type, using the
+ * preselector.
+ *
+ * A kind of "URL path", like "/news/" or "/misc/news/" or...
+ *
+ * @param type
+ * the type to get the selector of
+ *
+ * @return the selector
+ */
static public String getSelector(Type type) {
return preselector + "/" + type + "/";
}
- // TODO: check Downloader.java?
- static protected InputStream open(URL url) throws IOException {
- URLConnection conn = url.openConnection();
- conn.connect();
- InputStream in = conn.getInputStream();
- if ("gzip".equals(conn.getContentEncoding())) {
- in = new GZIPInputStream(in);
+ /**
+ * Get the first {@link Element} of the given class, or an empty span
+ * {@link Element} if none found.
+ *
+ * @param element
+ * the element to look in
+ * @param className
+ * the class to look for
+ *
+ * @return the value or an empty span {@link Element}
+ */
+ static protected Element firstOrEmpty(Element element, String className) {
+ Elements subElements = element.getElementsByClass(className);
+ if (subElements.size() > 0) {
+ return subElements.get(0);
+ }
+
+ return new Element("span");
+ }
+
+ /**
+ * Get the first {@link Element} of the given tag, or an empty span
+ * {@link Element} if none found.
+ *
+ * @param element
+ * the element to look in
+ * @param tagName
+ * the tag to look for
+ *
+ * @return the value or an empty span {@link Element}
+ */
+ static protected Element firstOrEmptyTag(Element element, String tagName) {
+ Elements subElements = element.getElementsByTag(tagName);
+ if (subElements.size() > 0) {
+ return subElements.get(0);
}
- return in;
+ return new Element("span");
+ }
+
+ /**
+ * Process the given element into text (each line is a text paragraph and
+ * can be prepended with ">" signs to indicate a quote or sub-quote or
+ * sub-sub-quote...).
+ *
+ * @param element
+ * the element to process
+ * @param elementProcessor
+ * the element processor, must not be NULL
+ *
+ * @return text lines, each line is a paragraph
+ */
+ static protected List