.cache -> gophermap
[gofetch.git] / src / be / nikiroo / gofetch / Fetcher.java
CommitLineData
73785268
NR
1package be.nikiroo.gofetch;
2
3import java.io.File;
4import java.io.FileWriter;
5import java.io.FilenameFilter;
6import java.io.IOException;
e402343e 7import java.util.ArrayList;
73785268 8import java.util.Arrays;
e402343e 9import java.util.Collections;
73785268
NR
10import java.util.List;
11
73785268
NR
12import be.nikiroo.gofetch.data.Story;
13import be.nikiroo.gofetch.output.Gopher;
14import be.nikiroo.gofetch.output.Html;
15import be.nikiroo.gofetch.output.Output;
16import be.nikiroo.gofetch.support.BasicSupport;
17import be.nikiroo.gofetch.support.BasicSupport.Type;
18import be.nikiroo.utils.IOUtils;
19
20/**
21 * The class that will manage the fetch operations.
22 * <p>
23 * It will scrap the required websites and process them to disk.
24 *
25 * @author niki
26 */
27public class Fetcher {
28 private File dir;
29 private String preselector;
30 private int maxStories;
31 private String hostname;
32 private int port;
33 private Type type;
34
35 /**
36 * Prepare a new {@link Fetcher}.
37 *
38 * @param dir
39 * the target directory where to save the files (won't have
40 * impact on the files' content)
41 * @param preselector
42 * the sub directory and (pre-)selector to use for the resources
43 * (<b>will</b> have an impact on the files' content)
44 * @param type
45 * the type of news to get (or the special keyword ALL to get all
46 * of the supported sources)
47 * @param maxStories
48 * the maximum number of stories to show on the resume page
49 * @param hostname
50 * the gopher host to use (<b>will</b> have an impact on the
51 * files' content)
52 * @param port
53 * the gopher port to use (<b>will</b> have an impact on the
54 * files' content)
55 */
56 public Fetcher(File dir, String preselector, Type type, int maxStories,
57 String hostname, int port) {
58 this.dir = dir;
59 this.preselector = preselector;
60 this.type = type;
61 this.maxStories = maxStories;
62 this.hostname = hostname;
63 this.port = port;
64 }
65
66 /**
67 * Start the fetching operation.
68 * <p>
69 * This method will handle the main pages itself, and will call
70 * {@link Fetcher#list(BasicSupport)} for the stories.
71 *
72 * @throws IOException
73 * in case of I/O error
74 */
75 public void start() throws IOException {
5c056aad
NR
76 StringBuilder gopherBuilder = new StringBuilder();
77 StringBuilder htmlBuilder = new StringBuilder();
78
79 BasicSupport.setPreselector(preselector);
80 for (Type type : Type.values()) {
81 BasicSupport support = BasicSupport.getSupport(type);
82
83 if (type == this.type || this.type == null) {
84 list(support);
85 }
86
e402343e 87 gopherBuilder.append(getLink(support.getDescription(),
588b54b8 88 support.getSelector(), true, false));
5c056aad
NR
89
90 String ref = support.getSelector();
91 while (ref.startsWith("/")) {
92 ref = ref.substring(1);
93 }
e402343e
NR
94 ref = "../" + ref + "/index.html";
95
588b54b8
NR
96 htmlBuilder.append(getLink(support.getDescription(), ref, false,
97 true));
5c056aad
NR
98 }
99
100 File gopherCache = new File(dir, preselector);
101 gopherCache.mkdirs();
102 File htmlIndex = new File(gopherCache, "index.html");
93e09a08 103 gopherCache = new File(gopherCache, "gophermap");
73785268 104
70b18499
NR
105 Output gopher = new Gopher(null, hostname, preselector, port);
106 Output html = new Html(null, hostname, preselector, port);
73785268 107
5c056aad 108 FileWriter writer = new FileWriter(gopherCache);
73785268 109 try {
5c056aad
NR
110 writer.append(gopher.getIndexHeader());
111 writer.append(gopherBuilder.toString());
112 writer.append(gopher.getIndexFooter());
113 } finally {
114 writer.close();
115 }
116
117 try {
118 writer = new FileWriter(htmlIndex);
119 writer.append(html.getIndexHeader());
120 writer.append(htmlBuilder.toString());
121 writer.append(html.getIndexFooter());
73785268
NR
122 } finally {
123 writer.close();
124 }
125 }
126
127 /**
128 * Process the stories for the given {@link BasicSupport} to disk.
129 *
130 * @param support
131 * the {@link BasicSupport} to download from
132 *
133 * @throws IOException
134 * in case of I/O error
135 **/
136 private void list(BasicSupport support) throws IOException {
5c056aad
NR
137 // Get stories:
138 System.err
139 .print("Listing recent news for " + support.getType() + "...");
140 List<Story> stories = support.list();
141 System.err.println(" " + stories.size() + " stories found!");
142
143 // Get comments (and update stories if needed):
144 int i = 1;
145 for (Story story : stories) {
146 System.err.println(String.format("%02d/%02d", i, stories.size())
147 + " Fetching full story " + story.getId() + "...");
148 support.fetch(story);
149 i++;
150 }
151
70b18499
NR
152 Output gopher = new Gopher(support.getType(), hostname, preselector,
153 port);
154 Output html = new Html(support.getType(), hostname, preselector, port);
73785268
NR
155
156 new File(dir, support.getSelector()).mkdirs();
157
73785268
NR
158 for (Story story : stories) {
159 IOUtils.writeSmallFile(dir, story.getSelector() + ".header",
5c056aad 160 gopher.exportHeader(story));
73785268 161 IOUtils.writeSmallFile(dir, story.getSelector() + ".header.html",
5c056aad 162 html.exportHeader(story));
73785268
NR
163
164 IOUtils.writeSmallFile(dir, story.getSelector(),
5c056aad 165 gopher.export(story));
73785268 166 IOUtils.writeSmallFile(dir, story.getSelector() + ".html",
5c056aad 167 html.export(story));
73785268
NR
168 }
169
5c056aad 170 // Finding headers of all stories in cache:
73785268
NR
171 File varDir = new File(dir, support.getSelector());
172 String[] headers = varDir.list(new FilenameFilter() {
173 @Override
174 public boolean accept(File dir, String name) {
175 return name.endsWith(".header");
176 }
177 });
178
e402343e
NR
179 // Reverse sort:
180 Arrays.sort(headers);
181 List<String> tmp = Arrays.asList(headers);
182 Collections.reverse(tmp);
183 headers = tmp.toArray(new String[] {});
184 //
185
186 // Write the index (with "MORE" links if needed)
187 int page = 0;
188 List<String> gopherLines = new ArrayList<String>();
189 List<String> htmlLines = new ArrayList<String>();
190 for (i = 0; i < headers.length; i++) {
191 gopherLines
192 .add(IOUtils.readSmallFile(new File(varDir, headers[i])));
193 htmlLines.add(IOUtils.readSmallFile(new File(varDir, headers[i]
194 + ".html")));
195
196 boolean enoughStories = (i > 0 && i % maxStories == 0);
197 boolean last = i == headers.length - 1;
198 if (enoughStories || last) {
199 if (!last) {
411d3399 200 gopherLines.add(getLink("More", support.getSelector()
93e09a08 201 + "gophermap_" + (page + 1), false, false));
e402343e 202 htmlLines.add(getLink("More", "index_" + (page + 1)
588b54b8 203 + ".html", false, true));
e402343e
NR
204 }
205
93e09a08 206 write(gopherLines, varDir, "gophermap", "", page);
e402343e
NR
207 write(htmlLines, varDir, "index", ".html", page);
208 gopherLines = new ArrayList<String>();
209 htmlLines = new ArrayList<String>();
210 page++;
5c056aad
NR
211 }
212 }
e402343e 213 }
5c056aad 214
e402343e
NR
215 private void write(List<String> lines, File varDir, String basename,
216 String ext, int page) throws IOException {
217 File file = new File(varDir, basename + (page > 0 ? "_" + page : "")
218 + ext);
219
220 FileWriter writer = new FileWriter(file);
5c056aad 221 try {
e402343e
NR
222 for (String line : lines) {
223 writer.append(line).append("\r\n");
5c056aad
NR
224 }
225 } finally {
226 writer.close();
227 }
e402343e 228 }
5c056aad 229
588b54b8 230 private String getLink(String name, String ref, boolean index, boolean html) {
e402343e 231 if (!html) {
dd8ad646 232 return new StringBuilder().append("1" + name).append("\t")
93e09a08 233 .append(ref) //
e402343e
NR
234 .append("\t").append(hostname) //
235 .append("\t").append(Integer.toString(port)) //
236 .append("\r\n").toString();
73785268 237 }
e402343e
NR
238
239 return new StringBuilder().append(
240 "<div class='site'><a href='" + ref + "'>" + name
241 + "</a></div>\n").toString();
73785268
NR
242 }
243}