Version 0.2.0: supports LWN, quotes, <br>s
[gofetch.git] / src / be / nikiroo / gofetch / support / BasicSupport.java
CommitLineData
73785268
NR
1package be.nikiroo.gofetch.support;
2
3import java.io.IOException;
4import java.io.InputStream;
5import java.net.URL;
6import java.net.URLConnection;
27008a87 7import java.util.ArrayList;
73785268
NR
8import java.util.List;
9import java.util.zip.GZIPInputStream;
10
27008a87
NR
11import org.jsoup.helper.StringUtil;
12import org.jsoup.nodes.Element;
13import org.jsoup.nodes.Node;
14import org.jsoup.nodes.TextNode;
15import org.jsoup.select.Elements;
16import org.jsoup.select.NodeTraversor;
17import org.jsoup.select.NodeVisitor;
18
73785268
NR
19import be.nikiroo.gofetch.data.Story;
20
21public abstract class BasicSupport {
22 public enum Type {
eaaeae39 23 SLASHDOT, PIPEDOT, LWN,
73785268
NR
24 }
25
27008a87
NR
26 public interface QuoteProcessor {
27 public boolean detectQuote(Node node);
28
29 public String processText(String text);
30
31 public boolean ignoreNode(Node node);
32 }
33
73785268
NR
34 static private String preselector;
35
36 private Type type;
37
38 abstract public List<Story> list() throws IOException;
39
5c056aad
NR
40 /**
41 * Fetch the full article content as well as all the comments associated to
42 * this {@link Story}, if any (can be empty, but not NULL).
43 *
44 * @param story
45 * the story to fetch the comments of
46 *
47 * @throws IOException
48 * in case of I/O error
49 */
50 abstract public void fetch(Story story) throws IOException;
73785268
NR
51
52 abstract public String getDescription();
2d95a873 53
73785268
NR
54 public String getSelector() {
55 return getSelector(type);
56 }
57
58 public Type getType() {
59 return type;
60 }
61
62 protected void setType(Type type) {
63 this.type = type;
64 }
65
66 /**
67 * @param preselector
68 * the preselector to set
69 */
70 static public void setPreselector(String preselector) {
71 BasicSupport.preselector = preselector;
72 }
73
74 static public BasicSupport getSupport(Type type) {
75 BasicSupport support = null;
76
77 if (type != null) {
78 switch (type) {
79 case SLASHDOT:
80 support = new Slashdot();
81 break;
2d95a873
NR
82 case PIPEDOT:
83 support = new Pipedot();
84 break;
eaaeae39
NR
85 case LWN:
86 support = new LWN();
87 break;
73785268
NR
88 }
89
90 if (support != null) {
91 support.setType(type);
92 }
93 }
94
95 return support;
96 }
97
98 static public String getSelector(Type type) {
99 return preselector + "/" + type + "/";
100 }
101
102 // TODO: check Downloader.java?
103 static protected InputStream open(URL url) throws IOException {
104 URLConnection conn = url.openConnection();
105 conn.connect();
106 InputStream in = conn.getInputStream();
107 if ("gzip".equals(conn.getContentEncoding())) {
108 in = new GZIPInputStream(in);
109 }
110
111 return in;
112 }
27008a87
NR
113
114 /**
115 * Get the first {@link Element} of the given class, or an empty span
116 * {@link Element} if none found.
117 *
118 * @param element
119 * the element to look in
120 * @param className
121 * the class to look for
122 *
123 * @return the value or an empty span {@link Element}
124 */
125 static protected Element firstOrEmpty(Element element, String className) {
126 Elements subElements = element.getElementsByClass(className);
127 if (subElements.size() > 0) {
128 return subElements.get(0);
129 }
130
131 return new Element("span");
132 }
133
134 /**
135 * Get the first {@link Element} of the given tag, or an empty span
136 * {@link Element} if none found.
137 *
138 * @param element
139 * the element to look in
140 * @param tagName
141 * the tag to look for
142 *
143 * @return the value or an empty span {@link Element}
144 */
145 static protected Element firstOrEmptyTag(Element element, String tagName) {
146 Elements subElements = element.getElementsByTag(tagName);
147 if (subElements.size() > 0) {
148 return subElements.get(0);
149 }
150
151 return new Element("span");
152 }
153
154 static protected List<String> toLines(Element element,
155 final QuoteProcessor quoteProcessor) {
156 final List<String> lines = new ArrayList<String>();
157 final StringBuilder currentLine = new StringBuilder();
158 final List<Integer> quoted = new ArrayList<Integer>();
159 final List<Node> ignoredNodes = new ArrayList<Node>();
160
161 if (element != null) {
162 new NodeTraversor(new NodeVisitor() {
163 @Override
164 public void head(Node node, int depth) {
165 if (quoteProcessor.ignoreNode(node)
166 || ignoredNodes.contains(node.parentNode())) {
167 ignoredNodes.add(node);
168 return;
169 }
170
171 String prep = "";
172 for (int i = 0; i < quoted.size(); i++) {
173 prep += ">";
174 }
175 prep += " ";
176
177 boolean enterQuote = quoteProcessor.detectQuote(node);
178 boolean leaveQuote = quoted.contains(depth);
179
180 if (enterQuote) {
181 quoted.add(depth);
182 }
183
184 if (leaveQuote) {
185 quoted.remove(Integer.valueOf(depth));
186 }
187
188 if (enterQuote || leaveQuote) {
189 if (currentLine.length() > 0) {
190 if (currentLine.charAt(currentLine.length() - 1) == '\n') {
191 currentLine.setLength(currentLine.length() - 1);
192 }
193 for (String l : currentLine.toString().split("\n")) {
194 lines.add(prep + l);
195 }
196 }
197 currentLine.setLength(0);
198 }
199
200 if (node instanceof Element) {
201 Element element = (Element) node;
202 boolean block = element.isBlock()
203 || element.tagName().equalsIgnoreCase("br");
204 if (block && currentLine.length() > 0) {
205 currentLine.append("\n");
206 }
207 } else if (node instanceof TextNode) {
208 TextNode textNode = (TextNode) node;
209 String line = StringUtil.normaliseWhitespace(textNode
210 .getWholeText());
211
212 currentLine.append(quoteProcessor.processText(line));
213 currentLine.append(" ");
214 }
215 }
216
217 @Override
218 public void tail(Node node, int depth) {
219 }
220 }).traverse(element);
221 }
222
223 if (currentLine.length() > 0) {
224 String prep = "";
225 for (int i = 0; i < quoted.size(); i++) {
226 prep += ">";
227 }
228 prep += " ";
229 if (currentLine.length() > 0) {
230 if (currentLine.charAt(currentLine.length() - 1) == '\n') {
231 currentLine.setLength(currentLine.length() - 1);
232 }
233 for (String l : currentLine.toString().split("\n")) {
234 lines.add(prep + l);
235 }
236 }
237 }
238
239 for (int i = 0; i < lines.size(); i++) {
240 lines.set(i, lines.get(i).replace(" ", " ").trim());
241 }
242
243 return lines;
244 }
73785268 245}