Fetcher.java

   1 package be.nikiroo.gofetch;
   2
   3 import java.io.File;
   4 import java.io.FileWriter;
   5 import java.io.FilenameFilter;
   6 import java.io.IOException;
   7 import java.util.ArrayList;
   8 import java.util.Arrays;
   9 import java.util.Collections;
  10 import java.util.List;
  11
  12 import be.nikiroo.gofetch.data.Story;
  13 import be.nikiroo.gofetch.output.Gopher;
  14 import be.nikiroo.gofetch.output.Html;
  15 import be.nikiroo.gofetch.output.Output;
  16 import be.nikiroo.gofetch.support.BasicSupport;
  17 import be.nikiroo.gofetch.support.BasicSupport.Type;
  18 import be.nikiroo.utils.IOUtils;
  19
  20 /**
  21  * The class that will manage the fetch operations.
  22  * <p>
  23  * It will scrap the required websites and process them to disk.
  24  *
  25  * @author niki
  26  */
  27 public class Fetcher {
  28         private File dir;
  29         private String preselector;
  30         private int maxStories;
  31         private String hostname;
  32         private int port;
  33         private Type type;
  34
  35         /**
  36          * Prepare a new {@link Fetcher}.
  37          *
  38          * @param dir
  39          *            the target directory where to save the files (won't have
  40          *            impact on the files' content)
  41          * @param preselector
  42          *            the sub directory and (pre-)selector to use for the resources
  43          *            (<b>will</b> have an impact on the files' content)
  44          * @param type
  45          *            the type of news to get (or the special keyword ALL to get all
  46          *            of the supported sources)
  47          * @param maxStories
  48          *            the maximum number of stories to show on the resume page
  49          * @param hostname
  50          *            the gopher host to use (<b>will</b> have an impact on the
  51          *            files' content)
  52          * @param port
  53          *            the gopher port to use (<b>will</b> have an impact on the
  54          *            files' content)
  55          */
  56         public Fetcher(File dir, String preselector, Type type, int maxStories,
  57                         String hostname, int port) {
  58                 this.dir = dir;
  59                 this.preselector = preselector;
  60                 this.type = type;
  61                 this.maxStories = maxStories;
  62                 this.hostname = hostname;
  63                 this.port = port;
  64         }
  65
  66         /**
  67          * Start the fetching operation.
  68          * <p>
  69          * This method will handle the main pages itself, and will call
  70          * {@link Fetcher#list(BasicSupport)} for the stories.
  71          *
  72          * @throws IOException
  73          *             in case of I/O error
  74          */
  75         public void start() throws IOException {
  76                 StringBuilder gopherBuilder = new StringBuilder();
  77                 StringBuilder htmlBuilder = new StringBuilder();
  78
  79                 BasicSupport.setPreselector(preselector);
  80                 for (Type type : Type.values()) {
  81                         BasicSupport support = BasicSupport.getSupport(type);
  82
  83                         if (type == this.type || this.type == null) {
  84                                 list(support);
  85                         }
  86
  87                         gopherBuilder.append(getLink(support.getDescription(),
  88                                         support.getSelector(), true, false));
  89
  90                         String ref = support.getSelector();
  91                         while (ref.startsWith("/")) {
  92                                 ref = ref.substring(1);
  93                         }
  94                         ref = "../" + ref + "/index.html";
  95
  96                         htmlBuilder.append(getLink(support.getDescription(),
  97                                         ref, true, true));
  98                 }
  99
 100                 File gopherCache = new File(dir, preselector);
 101                 gopherCache.mkdirs();
 102                 File htmlIndex = new File(gopherCache, "index.html");
 103                 gopherCache = new File(gopherCache, "gophermap");
 104
 105                 Output gopher = new Gopher(null, hostname, preselector, port);
 106                 Output html = new Html(null, hostname, preselector, port);
 107
 108                 FileWriter writer = new FileWriter(gopherCache);
 109                 try {
 110                         writer.append(gopher.getIndexHeader());
 111                         writer.append(gopherBuilder.toString());
 112                         writer.append(gopher.getIndexFooter());
 113                 } finally {
 114                         writer.close();
 115                 }
 116
 117                 try {
 118                         writer = new FileWriter(htmlIndex);
 119                         writer.append(html.getIndexHeader());
 120                         writer.append(htmlBuilder.toString());
 121                         writer.append(html.getIndexFooter());
 122                 } finally {
 123                         writer.close();
 124                 }
 125         }
 126
 127         /**
 128          * Process the stories for the given {@link BasicSupport} to disk.
 129          *
 130          * @param support
 131          *            the {@link BasicSupport} to download from
 132          *
 133          * @throws IOException
 134          *             in case of I/O error
 135          **/
 136         private void list(BasicSupport support) throws IOException {
 137                 // Get stories:
 138                 System.err
 139                                 .print("Listing recent news for " + support.getType() + "...");
 140                 List<Story> stories = support.list();
 141                 System.err.println(" " + stories.size() + " stories found!");
 142
 143                 // Get comments (and update stories if needed):
 144                 int i = 1;
 145                 for (Story story : stories) {
 146                         System.err.println(String.format("%02d/%02d", i, stories.size())
 147                                         + " Fetching full story " + story.getId() + "...");
 148                         support.fetch(story);
 149                         i++;
 150                 }
 151
 152                 Output gopher = new Gopher(support.getType(), hostname, preselector,
 153                                 port);
 154                 Output html = new Html(support.getType(), hostname, preselector, port);
 155
 156                 new File(dir, support.getSelector()).mkdirs();
 157
 158                 for (Story story : stories) {
 159                         IOUtils.writeSmallFile(dir, story.getSelector() + ".header",
 160                                         gopher.exportHeader(story));
 161                         IOUtils.writeSmallFile(dir, story.getSelector() + ".header.html",
 162                                         html.exportHeader(story));
 163
 164                         IOUtils.writeSmallFile(dir, story.getSelector(),
 165                                         gopher.export(story));
 166                         IOUtils.writeSmallFile(dir, story.getSelector() + ".html",
 167                                         html.export(story));
 168                 }
 169
 170                 // Finding headers of all stories in cache:
 171                 File varDir = new File(dir, support.getSelector());
 172                 String[] headers = varDir.list(new FilenameFilter() {
 173                         @Override
 174                         public boolean accept(File dir, String name) {
 175                                 return name.endsWith(".header");
 176                         }
 177                 });
 178
 179                 // Reverse sort:
 180                 Arrays.sort(headers);
 181                 List<String> tmp = Arrays.asList(headers);
 182                 Collections.reverse(tmp);
 183                 headers = tmp.toArray(new String[] {});
 184                 //
 185
 186                 // Write the index (with "MORE" links if needed)
 187                 int page = 0;
 188                 List<String> gopherLines = new ArrayList<String>();
 189                 List<String> htmlLines = new ArrayList<String>();
 190                 for (i = 0; i < headers.length; i++) {
 191                         gopherLines
 192                                         .add(IOUtils.readSmallFile(new File(varDir, headers[i])));
 193                         htmlLines.add(IOUtils.readSmallFile(new File(varDir, headers[i]
 194                                         + ".html")));
 195
 196                         boolean enoughStories = (i > 0 && i % maxStories == 0);
 197                         boolean last = i == headers.length - 1;
 198                         if (enoughStories || last) {
 199                                 if (!last) {
 200                                         gopherLines.add(getLink("More",
 201                                                 support.getSelector()
 202                                                         + "gophermap_"
 203                                                         + (page + 1),
 204                                                 true,
 205                                                 false));
 206
 207                                         htmlLines.add(getLink("More",
 208                                                 "index_"
 209                                                         + (page + 1)
 210                                                         + ".html",
 211                                                 true,
 212                                                 true));
 213                                 }
 214
 215                                 write(gopherLines, varDir, "gophermap", "", page);
 216                                 write(htmlLines, varDir, "index", ".html", page);
 217                                 gopherLines = new ArrayList<String>();
 218                                 htmlLines = new ArrayList<String>();
 219                                 page++;
 220                         }
 221                 }
 222         }
 223
 224         private void write(List<String> lines, File varDir, String basename,
 225                         String ext, int page) throws IOException {
 226                 File file = new File(varDir, basename + (page > 0 ? "_" + page : "")
 227                                 + ext);
 228
 229                 FileWriter writer = new FileWriter(file);
 230                 try {
 231                         for (String line : lines) {
 232                                 writer.append(line).append("\r\n");
 233                         }
 234                 } finally {
 235                         writer.close();
 236                 }
 237         }
 238
 239         /**
 240          *
 241          * @param name
 242          * @param ref
 243          * @param menu
 244          *            menu (gophermap, i) mode
 245          * @param html
 246          * @return
 247          */
 248         private String getLink(String name, String ref, boolean menu, boolean html) {
 249                 if (!html) {
 250                         return new StringBuilder().append((menu ? "1" : "0") + name)
 251                                         .append("\t").append(ref) //
 252                                         .append("\t").append(hostname) //
 253                                         .append("\t").append(Integer.toString(port)) //
 254                                         .append("\r\n").toString();
 255                 }
 256
 257                 return new StringBuilder().append(
 258                                 "<div class='site'><a href='" + ref + "'>" + name
 259                                                 + "</a></div>\n").toString();
 260         }
 261 }