Version 0.2.0: supports LWN, quotes, <br>s
[gofetch.git] / src / be / nikiroo / gofetch / support / BasicSupport.java
index 7a1d0eab9da69291bc112dcbd7f67abb127a49c6..1db066b3d107c8fefb26218675fa8c5c0767b815 100644 (file)
@@ -4,9 +4,18 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.net.URL;
 import java.net.URLConnection;
+import java.util.ArrayList;
 import java.util.List;
 import java.util.zip.GZIPInputStream;
 
+import org.jsoup.helper.StringUtil;
+import org.jsoup.nodes.Element;
+import org.jsoup.nodes.Node;
+import org.jsoup.nodes.TextNode;
+import org.jsoup.select.Elements;
+import org.jsoup.select.NodeTraversor;
+import org.jsoup.select.NodeVisitor;
+
 import be.nikiroo.gofetch.data.Story;
 
 public abstract class BasicSupport {
@@ -14,6 +23,14 @@ public abstract class BasicSupport {
                SLASHDOT, PIPEDOT, LWN,
        }
 
+       public interface QuoteProcessor {
+               public boolean detectQuote(Node node);
+
+               public String processText(String text);
+
+               public boolean ignoreNode(Node node);
+       }
+
        static private String preselector;
 
        private Type type;
@@ -93,4 +110,136 @@ public abstract class BasicSupport {
 
                return in;
        }
+
+       /**
+        * Get the first {@link Element} of the given class, or an empty span
+        * {@link Element} if none found.
+        * 
+        * @param element
+        *            the element to look in
+        * @param className
+        *            the class to look for
+        * 
+        * @return the value or an empty span {@link Element}
+        */
+       static protected Element firstOrEmpty(Element element, String className) {
+               Elements subElements = element.getElementsByClass(className);
+               if (subElements.size() > 0) {
+                       return subElements.get(0);
+               }
+
+               return new Element("span");
+       }
+
+       /**
+        * Get the first {@link Element} of the given tag, or an empty span
+        * {@link Element} if none found.
+        * 
+        * @param element
+        *            the element to look in
+        * @param tagName
+        *            the tag to look for
+        * 
+        * @return the value or an empty span {@link Element}
+        */
+       static protected Element firstOrEmptyTag(Element element, String tagName) {
+               Elements subElements = element.getElementsByTag(tagName);
+               if (subElements.size() > 0) {
+                       return subElements.get(0);
+               }
+
+               return new Element("span");
+       }
+
+       static protected List<String> toLines(Element element,
+                       final QuoteProcessor quoteProcessor) {
+               final List<String> lines = new ArrayList<String>();
+               final StringBuilder currentLine = new StringBuilder();
+               final List<Integer> quoted = new ArrayList<Integer>();
+               final List<Node> ignoredNodes = new ArrayList<Node>();
+
+               if (element != null) {
+                       new NodeTraversor(new NodeVisitor() {
+                               @Override
+                               public void head(Node node, int depth) {
+                                       if (quoteProcessor.ignoreNode(node)
+                                                       || ignoredNodes.contains(node.parentNode())) {
+                                               ignoredNodes.add(node);
+                                               return;
+                                       }
+
+                                       String prep = "";
+                                       for (int i = 0; i < quoted.size(); i++) {
+                                               prep += ">";
+                                       }
+                                       prep += " ";
+
+                                       boolean enterQuote = quoteProcessor.detectQuote(node);
+                                       boolean leaveQuote = quoted.contains(depth);
+
+                                       if (enterQuote) {
+                                               quoted.add(depth);
+                                       }
+
+                                       if (leaveQuote) {
+                                               quoted.remove(Integer.valueOf(depth));
+                                       }
+
+                                       if (enterQuote || leaveQuote) {
+                                               if (currentLine.length() > 0) {
+                                                       if (currentLine.charAt(currentLine.length() - 1) == '\n') {
+                                                               currentLine.setLength(currentLine.length() - 1);
+                                                       }
+                                                       for (String l : currentLine.toString().split("\n")) {
+                                                               lines.add(prep + l);
+                                                       }
+                                               }
+                                               currentLine.setLength(0);
+                                       }
+
+                                       if (node instanceof Element) {
+                                               Element element = (Element) node;
+                                               boolean block = element.isBlock()
+                                                               || element.tagName().equalsIgnoreCase("br");
+                                               if (block && currentLine.length() > 0) {
+                                                       currentLine.append("\n");
+                                               }
+                                       } else if (node instanceof TextNode) {
+                                               TextNode textNode = (TextNode) node;
+                                               String line = StringUtil.normaliseWhitespace(textNode
+                                                               .getWholeText());
+
+                                               currentLine.append(quoteProcessor.processText(line));
+                                               currentLine.append(" ");
+                                       }
+                               }
+
+                               @Override
+                               public void tail(Node node, int depth) {
+                               }
+                       }).traverse(element);
+               }
+
+               if (currentLine.length() > 0) {
+                       String prep = "";
+                       for (int i = 0; i < quoted.size(); i++) {
+                               prep += ">";
+                       }
+                       prep += " ";
+                       if (currentLine.length() > 0) {
+                               if (currentLine.charAt(currentLine.length() - 1) == '\n') {
+                                       currentLine.setLength(currentLine.length() - 1);
+                               }
+                               for (String l : currentLine.toString().split("\n")) {
+                                       lines.add(prep + l);
+                               }
+                       }
+               }
+
+               for (int i = 0; i < lines.size(); i++) {
+                       lines.set(i, lines.get(i).replace("  ", " ").trim());
+               }
+
+               return lines;
+       }
 }