[gofetch.git] / src / be / nikiroo / gofetch / Fetcher.java

package be.nikiroo.gofetch;

import java.io.File;
import java.io.FileWriter;
import java.io.FilenameFilter;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;

import be.nikiroo.gofetch.data.Comment;
import be.nikiroo.gofetch.data.Story;
import be.nikiroo.gofetch.output.Gopher;
import be.nikiroo.gofetch.output.Html;
import be.nikiroo.gofetch.output.Output;
import be.nikiroo.gofetch.support.BasicSupport;
import be.nikiroo.gofetch.support.BasicSupport.Type;
import be.nikiroo.utils.IOUtils;

/**
 * The class that will manage the fetch operations.
 * <p>
 * It will scrap the required websites and process them to disk.
 * 
 * @author niki
 */
public class Fetcher {
	private File dir;
	private String preselector;
	private int maxStories;
	private String hostname;
	private int port;
	private Type type;

	/**
	 * Prepare a new {@link Fetcher}.
	 * 
	 * @param dir
	 *            the target directory where to save the files (won't have
	 *            impact on the files' content)
	 * @param preselector
	 *            the sub directory and (pre-)selector to use for the resources
	 *            (<b>will</b> have an impact on the files' content)
	 * @param type
	 *            the type of news to get (or the special keyword ALL to get all
	 *            of the supported sources)
	 * @param maxStories
	 *            the maximum number of stories to show on the resume page
	 * @param hostname
	 *            the gopher host to use (<b>will</b> have an impact on the
	 *            files' content)
	 * @param port
	 *            the gopher port to use (<b>will</b> have an impact on the
	 *            files' content)
	 */
	public Fetcher(File dir, String preselector, Type type, int maxStories,
			String hostname, int port) {
		this.dir = dir;
		this.preselector = preselector;
		this.type = type;
		this.maxStories = maxStories;
		this.hostname = hostname;
		this.port = port;
	}

	/**
	 * Start the fetching operation.
	 * <p>
	 * This method will handle the main pages itself, and will call
	 * {@link Fetcher#list(BasicSupport)} for the stories.
	 * 
	 * @throws IOException
	 *             in case of I/O error
	 */
	public void start() throws IOException {
		File cache = new File(dir, preselector);
		cache.mkdirs();
		File cacheHtml = new File(cache, "index.html");
		cache = new File(cache, ".cache");

		Output gopher = new Gopher(null, hostname, port);
		Output html = new Html(null);

		FileWriter writer = new FileWriter(cache);
		try {
			FileWriter writerHtml = new FileWriter(cacheHtml);
			try {
				writer.append(gopher.getIndexHeader());
				writerHtml.append(html.getIndexHeader());

				Type types[];
				if (type == null) {
					types = Type.values();
				} else {
					types = new Type[] { type };
				}

				BasicSupport.setPreselector(preselector);
				for (Type type : types) {
					BasicSupport support = BasicSupport.getSupport(type);
					list(support);

					writer.append("1" + support.getDescription()).append("\t")
							.append("1" + support.getSelector()) //
							.append("\t").append(hostname) //
							.append("\t").append(Integer.toString(port)) //
							.append("\r\n");
					String ref = support.getSelector();
					while (ref.startsWith("/")) {
						ref = ref.substring(1);
					}
					writerHtml.append("<div class='site'><a href='../" + ref
							+ "'>" + support.getDescription() + "</a></div>");
				}

				writer.append(gopher.getIndexFooter());
				writerHtml.append(html.getIndexFooter());
			} finally {
				writerHtml.close();
			}
		} finally {
			writer.close();
		}
	}

	/**
	 * Process the stories for the given {@link BasicSupport} to disk.
	 * 
	 * @param support
	 *            the {@link BasicSupport} to download from
	 * 
	 * @throws IOException
	 *             in case of I/O error
	 **/
	private void list(BasicSupport support) throws IOException {
		Output gopher = new Gopher(support.getType(), hostname, port);
		Output html = new Html(support.getType());

		new File(dir, support.getSelector()).mkdirs();

		System.err
				.print("Listing recent news for " + support.getType() + "...");
		List<Story> stories = support.list();
		System.err.println(" " + stories.size() + " stories found!");
		int i = 1;
		for (Story story : stories) {
			IOUtils.writeSmallFile(dir, story.getSelector() + ".header",
					gopher.export(story));
			IOUtils.writeSmallFile(dir, story.getSelector() + ".header.html",
					html.export(story));

			System.err.println(String.format("%02d/%02d", i, stories.size())
					+ " Fetching comments for story " + story.getId() + "...");
			List<Comment> comments = support.getComments(story);

			IOUtils.writeSmallFile(dir, story.getSelector(),
					gopher.export(story, comments));
			IOUtils.writeSmallFile(dir, story.getSelector() + ".html",
					html.export(story, comments));

			i++;
		}

		File varDir = new File(dir, support.getSelector());
		String[] headers = varDir.list(new FilenameFilter() {
			@Override
			public boolean accept(File dir, String name) {
				return name.endsWith(".header");
			}
		});

		File cache = new File(varDir, ".cache");
		File cacheHtml = new File(varDir, "index.html");
		FileWriter writer = new FileWriter(cache);
		try {
			FileWriter writerHtml = new FileWriter(cacheHtml);
			try {
				if (headers.length > 0) {
					Arrays.sort(headers);
					int from = headers.length - 1;
					int to = headers.length - maxStories;
					if (to < 0) {
						to = 0;
					}
					for (i = from; i >= to; i--) {
						writer.append(IOUtils.readSmallFile(new File(varDir,
								headers[i])));

						writerHtml.append(IOUtils.readSmallFile(new File(
								varDir, headers[i] + ".html")));
					}
				}
			} finally {
				writerHtml.close();
			}
		} finally {
			writer.close();
		}
	}
}
Commit	Line	Data
73785268 NR	1	package be.nikiroo.gofetch;
	2
	3	import java.io.File;
	4	import java.io.FileWriter;
	5	import java.io.FilenameFilter;
	6	import java.io.IOException;
	7	import java.util.Arrays;
	8	import java.util.List;
	9
	10	import be.nikiroo.gofetch.data.Comment;
	11	import be.nikiroo.gofetch.data.Story;
	12	import be.nikiroo.gofetch.output.Gopher;
	13	import be.nikiroo.gofetch.output.Html;
	14	import be.nikiroo.gofetch.output.Output;
	15	import be.nikiroo.gofetch.support.BasicSupport;
	16	import be.nikiroo.gofetch.support.BasicSupport.Type;
	17	import be.nikiroo.utils.IOUtils;
	18
	19	/**
	20	* The class that will manage the fetch operations.
	21	* <p>
	22	* It will scrap the required websites and process them to disk.
	23	*
	24	* @author niki
	25	*/
	26	public class Fetcher {
	27	private File dir;
	28	private String preselector;
	29	private int maxStories;
	30	private String hostname;
	31	private int port;
	32	private Type type;
	33
	34	/**
	35	* Prepare a new {@link Fetcher}.
	36	*
	37	* @param dir
	38	* the target directory where to save the files (won't have
	39	* impact on the files' content)
	40	* @param preselector
	41	* the sub directory and (pre-)selector to use for the resources
	42	* (<b>will</b> have an impact on the files' content)
	43	* @param type
	44	* the type of news to get (or the special keyword ALL to get all
	45	* of the supported sources)
	46	* @param maxStories
	47	* the maximum number of stories to show on the resume page
	48	* @param hostname
	49	* the gopher host to use (<b>will</b> have an impact on the
	50	* files' content)
	51	* @param port
	52	* the gopher port to use (<b>will</b> have an impact on the
	53	* files' content)
	54	*/
	55	public Fetcher(File dir, String preselector, Type type, int maxStories,
	56	String hostname, int port) {
	57	this.dir = dir;
	58	this.preselector = preselector;
	59	this.type = type;
	60	this.maxStories = maxStories;
	61	this.hostname = hostname;
	62	this.port = port;
	63	}
	64
65	/**
66	* Start the fetching operation.
67	* <p>
68	* This method will handle the main pages itself, and will call
69	* {@link Fetcher#list(BasicSupport)} for the stories.
70	*
71	* @throws IOException
72	* in case of I/O error
73	*/
74	public void start() throws IOException {
75	File cache = new File(dir, preselector);
76	cache.mkdirs();
77	File cacheHtml = new File(cache, "index.html");
78	cache = new File(cache, ".cache");
79
80	Output gopher = new Gopher(null, hostname, port);
81	Output html = new Html(null);
82
83	FileWriter writer = new FileWriter(cache);
84	try {
85	FileWriter writerHtml = new FileWriter(cacheHtml);
86	try {
87	writer.append(gopher.getIndexHeader());
88	writerHtml.append(html.getIndexHeader());
89
90	Type types[];
91	if (type == null) {
92	types = Type.values();
93	} else {
94	types = new Type[] { type };
95	}
96
97	BasicSupport.setPreselector(preselector);
98	for (Type type : types) {
99	BasicSupport support = BasicSupport.getSupport(type);
100	list(support);
101
102	writer.append("1" + support.getDescription()).append("\t")
103	.append("1" + support.getSelector()) //
104	.append("\t").append(hostname) //
105	.append("\t").append(Integer.toString(port)) //
106	.append("\r\n");
107	String ref = support.getSelector();
108	while (ref.startsWith("/")) {
109	ref = ref.substring(1);
110	}
111	writerHtml.append("<div class='site'><a href='../" + ref
112	+ "'>" + support.getDescription() + "</a></div>");
113	}
114
115	writer.append(gopher.getIndexFooter());
116	writerHtml.append(html.getIndexFooter());
117	} finally {
118	writerHtml.close();
119	}
120	} finally {
121	writer.close();
122	}
123	}
124
125	/**
126	* Process the stories for the given {@link BasicSupport} to disk.
127	*
128	* @param support
129	* the {@link BasicSupport} to download from
130	*
131	* @throws IOException
132	* in case of I/O error
133	**/
134	private void list(BasicSupport support) throws IOException {
135	Output gopher = new Gopher(support.getType(), hostname, port);
136	Output html = new Html(support.getType());
137
138	new File(dir, support.getSelector()).mkdirs();
139
140	System.err
141	.print("Listing recent news for " + support.getType() + "...");
142	List<Story> stories = support.list();
143	System.err.println(" " + stories.size() + " stories found!");
144	int i = 1;
145	for (Story story : stories) {
146	IOUtils.writeSmallFile(dir, story.getSelector() + ".header",
147	gopher.export(story));
148	IOUtils.writeSmallFile(dir, story.getSelector() + ".header.html",
149	html.export(story));
150
151	System.err.println(String.format("%02d/%02d", i, stories.size())
152	+ " Fetching comments for story " + story.getId() + "...");
153	List<Comment> comments = support.getComments(story);
154
155	IOUtils.writeSmallFile(dir, story.getSelector(),
156	gopher.export(story, comments));
157	IOUtils.writeSmallFile(dir, story.getSelector() + ".html",
158	html.export(story, comments));
159
160	i++;
161	}
162
163	File varDir = new File(dir, support.getSelector());
164	String[] headers = varDir.list(new FilenameFilter() {
165	@Override
166	public boolean accept(File dir, String name) {
167	return name.endsWith(".header");
168	}
169	});
170
171	File cache = new File(varDir, ".cache");
172	File cacheHtml = new File(varDir, "index.html");
173	FileWriter writer = new FileWriter(cache);
174	try {
175	FileWriter writerHtml = new FileWriter(cacheHtml);
176	try {
177	if (headers.length > 0) {
178	Arrays.sort(headers);
179	int from = headers.length - 1;
180	int to = headers.length - maxStories;
181	if (to < 0) {
182	to = 0;
183	}
184	for (i = from; i >= to; i--) {
185	writer.append(IOUtils.readSmallFile(new File(varDir,
186	headers[i])));
187
188	writerHtml.append(IOUtils.readSmallFile(new File(
189	varDir, headers[i] + ".html")));
190	}
191	}
192	} finally {
193	writerHtml.close();
194	}
195	} finally {
196	writer.close();
197	}
198	}
199	}