Fix resume/not resume issue for Gopher cache
[gofetch.git] / src / be / nikiroo / gofetch / Fetcher.java
CommitLineData
73785268
NR
1package be.nikiroo.gofetch;
2
3import java.io.File;
4import java.io.FileWriter;
5import java.io.FilenameFilter;
6import java.io.IOException;
7import java.util.Arrays;
8import java.util.List;
9
73785268
NR
10import be.nikiroo.gofetch.data.Story;
11import be.nikiroo.gofetch.output.Gopher;
12import be.nikiroo.gofetch.output.Html;
13import be.nikiroo.gofetch.output.Output;
14import be.nikiroo.gofetch.support.BasicSupport;
15import be.nikiroo.gofetch.support.BasicSupport.Type;
16import be.nikiroo.utils.IOUtils;
17
18/**
19 * The class that will manage the fetch operations.
20 * <p>
21 * It will scrap the required websites and process them to disk.
22 *
23 * @author niki
24 */
25public class Fetcher {
26 private File dir;
27 private String preselector;
28 private int maxStories;
29 private String hostname;
30 private int port;
31 private Type type;
32
33 /**
34 * Prepare a new {@link Fetcher}.
35 *
36 * @param dir
37 * the target directory where to save the files (won't have
38 * impact on the files' content)
39 * @param preselector
40 * the sub directory and (pre-)selector to use for the resources
41 * (<b>will</b> have an impact on the files' content)
42 * @param type
43 * the type of news to get (or the special keyword ALL to get all
44 * of the supported sources)
45 * @param maxStories
46 * the maximum number of stories to show on the resume page
47 * @param hostname
48 * the gopher host to use (<b>will</b> have an impact on the
49 * files' content)
50 * @param port
51 * the gopher port to use (<b>will</b> have an impact on the
52 * files' content)
53 */
54 public Fetcher(File dir, String preselector, Type type, int maxStories,
55 String hostname, int port) {
56 this.dir = dir;
57 this.preselector = preselector;
58 this.type = type;
59 this.maxStories = maxStories;
60 this.hostname = hostname;
61 this.port = port;
62 }
63
64 /**
65 * Start the fetching operation.
66 * <p>
67 * This method will handle the main pages itself, and will call
68 * {@link Fetcher#list(BasicSupport)} for the stories.
69 *
70 * @throws IOException
71 * in case of I/O error
72 */
73 public void start() throws IOException {
5c056aad
NR
74 StringBuilder gopherBuilder = new StringBuilder();
75 StringBuilder htmlBuilder = new StringBuilder();
76
77 BasicSupport.setPreselector(preselector);
78 for (Type type : Type.values()) {
79 BasicSupport support = BasicSupport.getSupport(type);
80
81 if (type == this.type || this.type == null) {
82 list(support);
83 }
84
85 gopherBuilder.append("1" + support.getDescription()).append("\t")
86 .append("1" + support.getSelector()) //
87 .append("\t").append(hostname) //
88 .append("\t").append(Integer.toString(port)) //
89 .append("\r\n");
90
91 String ref = support.getSelector();
92 while (ref.startsWith("/")) {
93 ref = ref.substring(1);
94 }
95 htmlBuilder.append("<div class='site'><a href='../" + ref + "'>"
96 + support.getDescription() + "</a></div>\n");
97 }
98
99 File gopherCache = new File(dir, preselector);
100 gopherCache.mkdirs();
101 File htmlIndex = new File(gopherCache, "index.html");
102 gopherCache = new File(gopherCache, ".cache");
73785268 103
70b18499
NR
104 Output gopher = new Gopher(null, hostname, preselector, port);
105 Output html = new Html(null, hostname, preselector, port);
73785268 106
5c056aad 107 FileWriter writer = new FileWriter(gopherCache);
73785268 108 try {
5c056aad
NR
109 writer.append(gopher.getIndexHeader());
110 writer.append(gopherBuilder.toString());
111 writer.append(gopher.getIndexFooter());
112 } finally {
113 writer.close();
114 }
115
116 try {
117 writer = new FileWriter(htmlIndex);
118 writer.append(html.getIndexHeader());
119 writer.append(htmlBuilder.toString());
120 writer.append(html.getIndexFooter());
73785268
NR
121 } finally {
122 writer.close();
123 }
124 }
125
126 /**
127 * Process the stories for the given {@link BasicSupport} to disk.
128 *
129 * @param support
130 * the {@link BasicSupport} to download from
131 *
132 * @throws IOException
133 * in case of I/O error
134 **/
135 private void list(BasicSupport support) throws IOException {
5c056aad
NR
136 // Get stories:
137 System.err
138 .print("Listing recent news for " + support.getType() + "...");
139 List<Story> stories = support.list();
140 System.err.println(" " + stories.size() + " stories found!");
141
142 // Get comments (and update stories if needed):
143 int i = 1;
144 for (Story story : stories) {
145 System.err.println(String.format("%02d/%02d", i, stories.size())
146 + " Fetching full story " + story.getId() + "...");
147 support.fetch(story);
148 i++;
149 }
150
70b18499
NR
151 Output gopher = new Gopher(support.getType(), hostname, preselector,
152 port);
153 Output html = new Html(support.getType(), hostname, preselector, port);
73785268
NR
154
155 new File(dir, support.getSelector()).mkdirs();
156
73785268
NR
157 for (Story story : stories) {
158 IOUtils.writeSmallFile(dir, story.getSelector() + ".header",
5c056aad 159 gopher.exportHeader(story));
73785268 160 IOUtils.writeSmallFile(dir, story.getSelector() + ".header.html",
5c056aad 161 html.exportHeader(story));
73785268
NR
162
163 IOUtils.writeSmallFile(dir, story.getSelector(),
5c056aad 164 gopher.export(story));
73785268 165 IOUtils.writeSmallFile(dir, story.getSelector() + ".html",
5c056aad 166 html.export(story));
73785268
NR
167 }
168
5c056aad 169 // Finding headers of all stories in cache:
73785268
NR
170 File varDir = new File(dir, support.getSelector());
171 String[] headers = varDir.list(new FilenameFilter() {
172 @Override
173 public boolean accept(File dir, String name) {
174 return name.endsWith(".header");
175 }
176 });
177
5c056aad
NR
178 // Finding which ones to show:
179 int from = 0;
180 int to = 0;
181 if (headers.length > 0) {
182 Arrays.sort(headers);
183 from = headers.length - 1;
184 to = headers.length - maxStories;
185 if (to < 0) {
186 to = 0;
187 }
188 }
189
190 // Writing the cache/index files with the stories:
191 File gopherCache = new File(varDir, ".cache");
192 FileWriter writer = new FileWriter(gopherCache);
193 try {
194 for (i = from; i >= to; i--) {
195 writer.append(IOUtils
196 .readSmallFile(new File(varDir, headers[i])));
197 }
198 } finally {
199 writer.close();
200 }
201
202 File htmlIndex = new File(varDir, "index.html");
203 writer = new FileWriter(htmlIndex);
73785268 204 try {
5c056aad
NR
205 for (i = from; i >= to; i--) {
206 writer.append(IOUtils.readSmallFile(new File(varDir, headers[i]
207 + ".html")));
73785268
NR
208 }
209 } finally {
210 writer.close();
211 }
212 }
213}