X-Git-Url: http://git.nikiroo.be/?p=gofetch.git;a=blobdiff_plain;f=src%2Fbe%2Fnikiroo%2Fgofetch%2Fsupport%2FBasicSupport.java;h=1db066b3d107c8fefb26218675fa8c5c0767b815;hp=7a1d0eab9da69291bc112dcbd7f67abb127a49c6;hb=27008a8782c0ed96e07c8dc39ff0ed1f5163a9d0;hpb=93e09a08a68ffd69eed42ecbf95f317b518357d7 diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java index 7a1d0ea..1db066b 100644 --- a/src/be/nikiroo/gofetch/support/BasicSupport.java +++ b/src/be/nikiroo/gofetch/support/BasicSupport.java @@ -4,9 +4,18 @@ import java.io.IOException; import java.io.InputStream; import java.net.URL; import java.net.URLConnection; +import java.util.ArrayList; import java.util.List; import java.util.zip.GZIPInputStream; +import org.jsoup.helper.StringUtil; +import org.jsoup.nodes.Element; +import org.jsoup.nodes.Node; +import org.jsoup.nodes.TextNode; +import org.jsoup.select.Elements; +import org.jsoup.select.NodeTraversor; +import org.jsoup.select.NodeVisitor; + import be.nikiroo.gofetch.data.Story; public abstract class BasicSupport { @@ -14,6 +23,14 @@ public abstract class BasicSupport { SLASHDOT, PIPEDOT, LWN, } + public interface QuoteProcessor { + public boolean detectQuote(Node node); + + public String processText(String text); + + public boolean ignoreNode(Node node); + } + static private String preselector; private Type type; @@ -93,4 +110,136 @@ public abstract class BasicSupport { return in; } + + /** + * Get the first {@link Element} of the given class, or an empty span + * {@link Element} if none found. + * + * @param element + * the element to look in + * @param className + * the class to look for + * + * @return the value or an empty span {@link Element} + */ + static protected Element firstOrEmpty(Element element, String className) { + Elements subElements = element.getElementsByClass(className); + if (subElements.size() > 0) { + return subElements.get(0); + } + + return new Element("span"); + } + + /** + * Get the first {@link Element} of the given tag, or an empty span + * {@link Element} if none found. + * + * @param element + * the element to look in + * @param tagName + * the tag to look for + * + * @return the value or an empty span {@link Element} + */ + static protected Element firstOrEmptyTag(Element element, String tagName) { + Elements subElements = element.getElementsByTag(tagName); + if (subElements.size() > 0) { + return subElements.get(0); + } + + return new Element("span"); + } + + static protected List toLines(Element element, + final QuoteProcessor quoteProcessor) { + final List lines = new ArrayList(); + final StringBuilder currentLine = new StringBuilder(); + final List quoted = new ArrayList(); + final List ignoredNodes = new ArrayList(); + + if (element != null) { + new NodeTraversor(new NodeVisitor() { + @Override + public void head(Node node, int depth) { + if (quoteProcessor.ignoreNode(node) + || ignoredNodes.contains(node.parentNode())) { + ignoredNodes.add(node); + return; + } + + String prep = ""; + for (int i = 0; i < quoted.size(); i++) { + prep += ">"; + } + prep += " "; + + boolean enterQuote = quoteProcessor.detectQuote(node); + boolean leaveQuote = quoted.contains(depth); + + if (enterQuote) { + quoted.add(depth); + } + + if (leaveQuote) { + quoted.remove(Integer.valueOf(depth)); + } + + if (enterQuote || leaveQuote) { + if (currentLine.length() > 0) { + if (currentLine.charAt(currentLine.length() - 1) == '\n') { + currentLine.setLength(currentLine.length() - 1); + } + for (String l : currentLine.toString().split("\n")) { + lines.add(prep + l); + } + } + currentLine.setLength(0); + } + + if (node instanceof Element) { + Element element = (Element) node; + boolean block = element.isBlock() + || element.tagName().equalsIgnoreCase("br"); + if (block && currentLine.length() > 0) { + currentLine.append("\n"); + } + } else if (node instanceof TextNode) { + TextNode textNode = (TextNode) node; + String line = StringUtil.normaliseWhitespace(textNode + .getWholeText()); + + currentLine.append(quoteProcessor.processText(line)); + currentLine.append(" "); + } + } + + @Override + public void tail(Node node, int depth) { + } + }).traverse(element); + } + + if (currentLine.length() > 0) { + String prep = ""; + for (int i = 0; i < quoted.size(); i++) { + prep += ">"; + } + prep += " "; + if (currentLine.length() > 0) { + if (currentLine.charAt(currentLine.length() - 1) == '\n') { + currentLine.setLength(currentLine.length() - 1); + } + for (String l : currentLine.toString().split("\n")) { + lines.add(prep + l); + } + } + } + + for (int i = 0; i < lines.size(); i++) { + lines.set(i, lines.get(i).replace(" ", " ").trim()); + } + + return lines; + } }