Update to the gopher mini-test service
[gofetch.git] / src / be / nikiroo / gofetch / support / BasicSupport.java
1 package be.nikiroo.gofetch.support;
2
3 import java.io.IOException;
4 import java.io.InputStream;
5 import java.net.URL;
6 import java.net.URLConnection;
7 import java.util.ArrayList;
8 import java.util.List;
9 import java.util.zip.GZIPInputStream;
10
11 import org.jsoup.helper.StringUtil;
12 import org.jsoup.nodes.Element;
13 import org.jsoup.nodes.Node;
14 import org.jsoup.nodes.TextNode;
15 import org.jsoup.select.Elements;
16 import org.jsoup.select.NodeTraversor;
17 import org.jsoup.select.NodeVisitor;
18
19 import be.nikiroo.gofetch.data.Story;
20
21 public abstract class BasicSupport {
22 public enum Type {
23 SLASHDOT, PIPEDOT, LWN, LEMONDE,
24 }
25
26 public interface QuoteProcessor {
27 public boolean detectQuote(Node node);
28
29 public String processText(String text);
30
31 public boolean ignoreNode(Node node);
32
33 /**
34 * Manually process this node if so desired.
35 *
36 * @param node
37 * the node to optionally process
38 *
39 * @return NULL if not processed, a {@link String} (may be empty) if we
40 * must not process it any further
41 */
42 public String manualProcessing(Node node);
43 }
44
45 static private String preselector;
46
47 private Type type;
48
49 /**
50 * List all the recent items, but only assure the ID and internal URL to
51 * fetch it later on (until it has been fetched, the rest of the
52 * {@link Story} is not confirmed).
53 *
54 * @return the list of new stories
55 *
56 * @throws IOException
57 * in case of I/O
58 */
59 abstract public List<Story> list() throws IOException;
60
61 /**
62 * Fetch the full article content as well as all the comments associated to
63 * this {@link Story}, if any (can be empty, but not NULL).
64 *
65 * @param story
66 * the story to fetch the comments of
67 *
68 * @throws IOException
69 * in case of I/O error
70 */
71 abstract public void fetch(Story story) throws IOException;
72
73 abstract public String getDescription();
74
75 public String getSelector() {
76 return getSelector(type);
77 }
78
79 public Type getType() {
80 return type;
81 }
82
83 protected void setType(Type type) {
84 this.type = type;
85 }
86
87 /**
88 * @param preselector
89 * the preselector to set
90 */
91 static public void setPreselector(String preselector) {
92 BasicSupport.preselector = preselector;
93 }
94
95 static public BasicSupport getSupport(Type type) {
96 BasicSupport support = null;
97
98 if (type != null) {
99 switch (type) {
100 case SLASHDOT:
101 support = new Slashdot();
102 break;
103 case PIPEDOT:
104 support = new Pipedot();
105 break;
106 case LWN:
107 support = new LWN();
108 break;
109 case LEMONDE:
110 support = new LeMonde();
111 break;
112 }
113
114 if (support != null) {
115 support.setType(type);
116 }
117 }
118
119 return support;
120 }
121
122 static public String getSelector(Type type) {
123 return preselector + "/" + type + "/";
124 }
125
126 // TODO: check Downloader.java?
127 static protected InputStream open(URL url) throws IOException {
128 URLConnection conn = url.openConnection();
129 conn.connect();
130 InputStream in = conn.getInputStream();
131 if ("gzip".equals(conn.getContentEncoding())) {
132 in = new GZIPInputStream(in);
133 }
134
135 return in;
136 }
137
138 /**
139 * Get the first {@link Element} of the given class, or an empty span
140 * {@link Element} if none found.
141 *
142 * @param element
143 * the element to look in
144 * @param className
145 * the class to look for
146 *
147 * @return the value or an empty span {@link Element}
148 */
149 static protected Element firstOrEmpty(Element element, String className) {
150 Elements subElements = element.getElementsByClass(className);
151 if (subElements.size() > 0) {
152 return subElements.get(0);
153 }
154
155 return new Element("span");
156 }
157
158 /**
159 * Get the first {@link Element} of the given tag, or an empty span
160 * {@link Element} if none found.
161 *
162 * @param element
163 * the element to look in
164 * @param tagName
165 * the tag to look for
166 *
167 * @return the value or an empty span {@link Element}
168 */
169 static protected Element firstOrEmptyTag(Element element, String tagName) {
170 Elements subElements = element.getElementsByTag(tagName);
171 if (subElements.size() > 0) {
172 return subElements.get(0);
173 }
174
175 return new Element("span");
176 }
177
178 static protected List<String> toLines(Element element,
179 final QuoteProcessor quoteProcessor) {
180 final List<String> lines = new ArrayList<String>();
181 final StringBuilder currentLine = new StringBuilder();
182 final List<Integer> quoted = new ArrayList<Integer>();
183 final List<Node> ignoredNodes = new ArrayList<Node>();
184
185 if (element != null) {
186 new NodeTraversor(new NodeVisitor() {
187 @Override
188 public void head(Node node, int depth) {
189 String manual = null;
190 boolean ignore = quoteProcessor.ignoreNode(node)
191 || ignoredNodes.contains(node.parentNode());
192 if (!ignore) {
193 manual = quoteProcessor.manualProcessing(node);
194 if (manual != null) {
195 currentLine.append(manual);
196 ignore = true;
197 }
198 }
199
200 if (ignore) {
201 ignoredNodes.add(node);
202 return;
203 }
204
205 String prep = "";
206 for (int i = 0; i < quoted.size(); i++) {
207 prep += ">";
208 }
209 prep += " ";
210
211 boolean enterQuote = quoteProcessor.detectQuote(node);
212 boolean leaveQuote = quoted.contains(depth);
213
214 if (enterQuote) {
215 quoted.add(depth);
216 }
217
218 if (leaveQuote) {
219 quoted.remove(Integer.valueOf(depth));
220 }
221
222 if (enterQuote || leaveQuote) {
223 if (currentLine.length() > 0) {
224 if (currentLine.charAt(currentLine.length() - 1) == '\n') {
225 currentLine.setLength(currentLine.length() - 1);
226 }
227 for (String l : currentLine.toString().split("\n")) {
228 lines.add(prep + l);
229 }
230 }
231 currentLine.setLength(0);
232 }
233
234 if (node instanceof Element) {
235 Element element = (Element) node;
236 boolean block = element.isBlock()
237 || element.tagName().equalsIgnoreCase("br");
238 if (block && currentLine.length() > 0) {
239 currentLine.append("\n");
240 }
241 } else if (node instanceof TextNode) {
242 TextNode textNode = (TextNode) node;
243 String line = StringUtil.normaliseWhitespace(textNode
244 .getWholeText());
245
246 currentLine.append(quoteProcessor.processText(line));
247 currentLine.append(" ");
248 }
249 }
250
251 @Override
252 public void tail(Node node, int depth) {
253 }
254 }).traverse(element);
255 }
256
257 if (currentLine.length() > 0) {
258 String prep = "";
259 for (int i = 0; i < quoted.size(); i++) {
260 prep += ">";
261 }
262 prep += " ";
263 if (currentLine.length() > 0) {
264 if (currentLine.charAt(currentLine.length() - 1) == '\n') {
265 currentLine.setLength(currentLine.length() - 1);
266 }
267 for (String l : currentLine.toString().split("\n")) {
268 lines.add(prep + l);
269 }
270 }
271 }
272
273 for (int i = 0; i < lines.size(); i++) {
274 lines.set(i, lines.get(i).replace(" ", " ").trim());
275 }
276
277 return lines;
278 }
279 }