X-Git-Url: http://git.nikiroo.be/?a=blobdiff_plain;ds=sidebyside;f=src%2Fbe%2Fnikiroo%2Fgofetch%2Fsupport%2FBasicSupport.java;h=615c72d6b0cbd81c4f59d8472878ea07ef4b730a;hb=eda7789b51a8df0ea85c1f8e93e30bbb5115219a;hp=1db066b3d107c8fefb26218675fa8c5c0767b815;hpb=27008a8782c0ed96e07c8dc39ff0ed1f5163a9d0;p=gofetch.git diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java index 1db066b..615c72d 100644 --- a/src/be/nikiroo/gofetch/support/BasicSupport.java +++ b/src/be/nikiroo/gofetch/support/BasicSupport.java @@ -1,12 +1,8 @@ package be.nikiroo.gofetch.support; import java.io.IOException; -import java.io.InputStream; -import java.net.URL; -import java.net.URLConnection; import java.util.ArrayList; import java.util.List; -import java.util.zip.GZIPInputStream; import org.jsoup.helper.StringUtil; import org.jsoup.nodes.Element; @@ -17,24 +13,109 @@ import org.jsoup.select.NodeTraversor; import org.jsoup.select.NodeVisitor; import be.nikiroo.gofetch.data.Story; +import be.nikiroo.utils.Downloader; public abstract class BasicSupport { + protected static Downloader downloader = new Downloader("gofetcher"); + public enum Type { - SLASHDOT, PIPEDOT, LWN, + SLASHDOT, PIPEDOT, LWN, LEMONDE, } - public interface QuoteProcessor { + /** + * Used to process an element into lines. + * + * @author niki + */ + public interface ElementProcessor { + /** + * Detect if this node is a quote and should be trated as such. + * + * @param node + * the node to check + * @return TRUE if it is + */ public boolean detectQuote(Node node); + /** + * Process text content (will be called on each text element, allowing + * you to modify it if needed). + * + * @param text + * the text to process + * @return + */ public String processText(String text); + /** + * Ignore this node. + * + * @param node + * the node to ignore + * @return TRUE if it has to be ignored + */ public boolean ignoreNode(Node node); + + /** + * Manually process this node (and return the manual processing value) + * if so desired. + *
+ * If the node is manually processed, it and its children will not be
+ * automatically processed.
+ *
+ * @param node
+ * the node to optionally process
+ *
+ * @return NULL if not processed (will thus be automatically processed
+ * as usual), a {@link String} (may be empty) if we process it
+ * manually -- the given {@link String} will be used instead of
+ * the usual automatic processing if not NULL
+ */
+ public String manualProcessing(Node node);
+ }
+
+ /**
+ * A default {@link ElementProcessor} (will not detect or process anything
+ * manually).
+ *
+ * @author niki
+ */
+ public class BasicElementProcessor implements ElementProcessor {
+ @Override
+ public boolean detectQuote(Node node) {
+ return false;
+ }
+
+ @Override
+ public String processText(String text) {
+ return text;
+ }
+
+ @Override
+ public boolean ignoreNode(Node node) {
+ return false;
+ }
+
+ @Override
+ public String manualProcessing(Node node) {
+ return null;
+ }
}
static private String preselector;
private Type type;
+ /**
+ * List all the recent items, but only assure the ID and internal URL to
+ * fetch it later on (until it has been fetched, the rest of the
+ * {@link Story} is not confirmed).
+ *
+ * @return the list of new stories
+ *
+ * @throws IOException
+ * in case of I/O
+ */
abstract public List