First version (slashdot supported)
[gofetch.git] / src / be / nikiroo / gofetch / Fetcher.java
CommitLineData
73785268
NR
1package be.nikiroo.gofetch;
2
3import java.io.File;
4import java.io.FileWriter;
5import java.io.FilenameFilter;
6import java.io.IOException;
7import java.util.Arrays;
8import java.util.List;
9
10import be.nikiroo.gofetch.data.Comment;
11import be.nikiroo.gofetch.data.Story;
12import be.nikiroo.gofetch.output.Gopher;
13import be.nikiroo.gofetch.output.Html;
14import be.nikiroo.gofetch.output.Output;
15import be.nikiroo.gofetch.support.BasicSupport;
16import be.nikiroo.gofetch.support.BasicSupport.Type;
17import be.nikiroo.utils.IOUtils;
18
19/**
20 * The class that will manage the fetch operations.
21 * <p>
22 * It will scrap the required websites and process them to disk.
23 *
24 * @author niki
25 */
26public class Fetcher {
27 private File dir;
28 private String preselector;
29 private int maxStories;
30 private String hostname;
31 private int port;
32 private Type type;
33
34 /**
35 * Prepare a new {@link Fetcher}.
36 *
37 * @param dir
38 * the target directory where to save the files (won't have
39 * impact on the files' content)
40 * @param preselector
41 * the sub directory and (pre-)selector to use for the resources
42 * (<b>will</b> have an impact on the files' content)
43 * @param type
44 * the type of news to get (or the special keyword ALL to get all
45 * of the supported sources)
46 * @param maxStories
47 * the maximum number of stories to show on the resume page
48 * @param hostname
49 * the gopher host to use (<b>will</b> have an impact on the
50 * files' content)
51 * @param port
52 * the gopher port to use (<b>will</b> have an impact on the
53 * files' content)
54 */
55 public Fetcher(File dir, String preselector, Type type, int maxStories,
56 String hostname, int port) {
57 this.dir = dir;
58 this.preselector = preselector;
59 this.type = type;
60 this.maxStories = maxStories;
61 this.hostname = hostname;
62 this.port = port;
63 }
64
65 /**
66 * Start the fetching operation.
67 * <p>
68 * This method will handle the main pages itself, and will call
69 * {@link Fetcher#list(BasicSupport)} for the stories.
70 *
71 * @throws IOException
72 * in case of I/O error
73 */
74 public void start() throws IOException {
75 File cache = new File(dir, preselector);
76 cache.mkdirs();
77 File cacheHtml = new File(cache, "index.html");
78 cache = new File(cache, ".cache");
79
80 Output gopher = new Gopher(null, hostname, port);
81 Output html = new Html(null);
82
83 FileWriter writer = new FileWriter(cache);
84 try {
85 FileWriter writerHtml = new FileWriter(cacheHtml);
86 try {
87 writer.append(gopher.getIndexHeader());
88 writerHtml.append(html.getIndexHeader());
89
90 Type types[];
91 if (type == null) {
92 types = Type.values();
93 } else {
94 types = new Type[] { type };
95 }
96
97 BasicSupport.setPreselector(preselector);
98 for (Type type : types) {
99 BasicSupport support = BasicSupport.getSupport(type);
100 list(support);
101
102 writer.append("1" + support.getDescription()).append("\t")
103 .append("1" + support.getSelector()) //
104 .append("\t").append(hostname) //
105 .append("\t").append(Integer.toString(port)) //
106 .append("\r\n");
107 String ref = support.getSelector();
108 while (ref.startsWith("/")) {
109 ref = ref.substring(1);
110 }
111 writerHtml.append("<div class='site'><a href='../" + ref
112 + "'>" + support.getDescription() + "</a></div>");
113 }
114
115 writer.append(gopher.getIndexFooter());
116 writerHtml.append(html.getIndexFooter());
117 } finally {
118 writerHtml.close();
119 }
120 } finally {
121 writer.close();
122 }
123 }
124
125 /**
126 * Process the stories for the given {@link BasicSupport} to disk.
127 *
128 * @param support
129 * the {@link BasicSupport} to download from
130 *
131 * @throws IOException
132 * in case of I/O error
133 **/
134 private void list(BasicSupport support) throws IOException {
135 Output gopher = new Gopher(support.getType(), hostname, port);
136 Output html = new Html(support.getType());
137
138 new File(dir, support.getSelector()).mkdirs();
139
140 System.err
141 .print("Listing recent news for " + support.getType() + "...");
142 List<Story> stories = support.list();
143 System.err.println(" " + stories.size() + " stories found!");
144 int i = 1;
145 for (Story story : stories) {
146 IOUtils.writeSmallFile(dir, story.getSelector() + ".header",
147 gopher.export(story));
148 IOUtils.writeSmallFile(dir, story.getSelector() + ".header.html",
149 html.export(story));
150
151 System.err.println(String.format("%02d/%02d", i, stories.size())
152 + " Fetching comments for story " + story.getId() + "...");
153 List<Comment> comments = support.getComments(story);
154
155 IOUtils.writeSmallFile(dir, story.getSelector(),
156 gopher.export(story, comments));
157 IOUtils.writeSmallFile(dir, story.getSelector() + ".html",
158 html.export(story, comments));
159
160 i++;
161 }
162
163 File varDir = new File(dir, support.getSelector());
164 String[] headers = varDir.list(new FilenameFilter() {
165 @Override
166 public boolean accept(File dir, String name) {
167 return name.endsWith(".header");
168 }
169 });
170
171 File cache = new File(varDir, ".cache");
172 File cacheHtml = new File(varDir, "index.html");
173 FileWriter writer = new FileWriter(cache);
174 try {
175 FileWriter writerHtml = new FileWriter(cacheHtml);
176 try {
177 if (headers.length > 0) {
178 Arrays.sort(headers);
179 int from = headers.length - 1;
180 int to = headers.length - maxStories;
181 if (to < 0) {
182 to = 0;
183 }
184 for (i = from; i >= to; i--) {
185 writer.append(IOUtils.readSmallFile(new File(varDir,
186 headers[i])));
187
188 writerHtml.append(IOUtils.readSmallFile(new File(
189 varDir, headers[i] + ".html")));
190 }
191 }
192 } finally {
193 writerHtml.close();
194 }
195 } finally {
196 writer.close();
197 }
198 }
199}