src/be/nikiroo/gofetch/Fetcher.java

   1 package be.nikiroo.gofetch;
   2
   3 import java.io.File;
   4 import java.io.FileWriter;
   5 import java.io.FilenameFilter;
   6 import java.io.IOException;
   7 import java.util.ArrayList;
   8 import java.util.Arrays;
   9 import java.util.Collections;
  10 import java.util.List;
  11
  12 import be.nikiroo.gofetch.data.Story;
  13 import be.nikiroo.gofetch.output.Gopher;
  14 import be.nikiroo.gofetch.output.Html;
  15 import be.nikiroo.gofetch.output.Output;
  16 import be.nikiroo.gofetch.support.BasicSupport;
  17 import be.nikiroo.gofetch.support.Type;
  18 import be.nikiroo.utils.IOUtils;
  19
  20 /**
  21  * The class that will manage the fetch operations.
  22  * <p>
  23  * It will scrap the required websites and process them to disk.
  24  *
  25  * @author niki
  26  */
  27 public class Fetcher {
  28         private File dir;
  29         private String preselector;
  30         private int maxStories;
  31         private String hostname;
  32         private int port;
  33         private Type type;
  34
  35         /**
  36          * Prepare a new {@link Fetcher}.
  37          *
  38          * @param dir
  39          *            the target directory where to save the files (won't have
  40          *            impact on the files' content)
  41          * @param preselector
  42          *            the sub directory and (pre-)selector to use for the resources
  43          *            (<b>will</b> have an impact on the files' content)
  44          * @param type
  45          *            the type of news to get (or NULL to get all of the supported
  46          *            sources)
  47          * @param maxStories
  48          *            the maximum number of stories to show on the resume page
  49          * @param hostname
  50          *            the gopher host to use (<b>will</b> have an impact on the
  51          *            files' content)
  52          * @param port
  53          *            the gopher port to use (<b>will</b> have an impact on the
  54          *            files' content)
  55          */
  56         public Fetcher(File dir, String preselector, Type type, int maxStories,
  57                         String hostname, int port) {
  58                 this.dir = dir;
  59                 this.preselector = preselector;
  60                 this.type = type;
  61                 this.maxStories = maxStories;
  62                 this.hostname = hostname;
  63                 this.port = port;
  64         }
  65
  66         /**
  67          * Start the fetching operation.
  68          * <p>
  69          * This method will handle the main pages itself, and will call
  70          * {@link Fetcher#list(BasicSupport)} for the stories.
  71          *
  72          * @throws IOException
  73          *             in case of I/O error
  74          */
  75         public void start() throws IOException {
  76                 StringBuilder gopherBuilder = new StringBuilder();
  77                 StringBuilder htmlBuilder = new StringBuilder();
  78
  79                 BasicSupport.setPreselector(preselector);
  80                 for (Type type : Type.values()) {
  81                         BasicSupport support = BasicSupport.getSupport(type);
  82
  83                         if (type == this.type || this.type == null) {
  84                                 try {
  85                                         list(support);
  86                                 } catch (Exception e) {
  87                                         new Exception("Failed to process support: " + type, e)
  88                                                         .printStackTrace();
  89                                 }
  90                         }
  91
  92                         gopherBuilder.append(getLink(support.getDescription(),
  93                                         support.getSelector(), true, false));
  94
  95                         String ref = support.getSelector();
  96                         while (ref.startsWith("/")) {
  97                                 ref = ref.substring(1);
  98                         }
  99                         ref = "../" + ref + "/index.html";
 100
 101                         htmlBuilder.append(getLink(support.getDescription(), ref, true,
 102                                         true));
 103                 }
 104
 105                 File gopherCache = new File(dir, preselector);
 106                 gopherCache.mkdirs();
 107                 File htmlIndex = new File(gopherCache, "index.html");
 108                 gopherCache = new File(gopherCache, "gophermap");
 109
 110                 Output gopher = new Gopher(null, hostname, preselector, port);
 111                 Output html = new Html(null, hostname, preselector, port);
 112
 113                 FileWriter writer = new FileWriter(gopherCache);
 114                 try {
 115                         writer.append(gopher.getMainIndexHeader());
 116                         writer.append(gopherBuilder.toString());
 117                         writer.append(gopher.getMainIndexFooter());
 118                 } finally {
 119                         writer.close();
 120                 }
 121
 122                 try {
 123                         writer = new FileWriter(htmlIndex);
 124                         writer.append(html.getMainIndexHeader());
 125                         writer.append(htmlBuilder.toString());
 126                         writer.append(html.getMainIndexFooter());
 127                 } finally {
 128                         writer.close();
 129                 }
 130         }
 131
 132         /**
 133          * Process the stories for the given {@link BasicSupport} to disk.
 134          *
 135          * @param support
 136          *            the {@link BasicSupport} to download from
 137          *
 138          * @throws IOException
 139          *             in case of I/O error
 140          **/
 141         private void list(BasicSupport support) throws IOException {
 142                 // Get stories:
 143                 System.err
 144                                 .print("Listing recent news for " + support.getType() + "...");
 145                 List<Story> stories = support.list();
 146                 System.err.println(" " + stories.size() + " stories found!");
 147
 148                 // Get comments (and update stories if needed):
 149                 int i = 1;
 150                 List<Story> fetchedStories = new ArrayList<Story>(stories.size());
 151                 for (Story story : stories) {
 152                         System.err.print(String.format("%02d/%02d", i, stories.size())
 153                                         + " Fetching full story " + story.getId() + "...");
 154                         try {
 155                                 support.fetch(story);
 156                                 fetchedStories.add(story);
 157                                 System.err.println();
 158                         } catch (IOException e) {
 159 e.printStackTrace();
 160                                 System.err.println(" Failed to get story!");
 161                         }
 162                         i++;
 163                 }
 164                 stories = fetchedStories;
 165
 166                 Output gopher = new Gopher(support.getType(), hostname, preselector,
 167                                 port);
 168                 Output html = new Html(support.getType(), hostname, preselector, port);
 169
 170                 new File(dir, support.getSelector()).mkdirs();
 171
 172                 for (Story story : stories) {
 173                         IOUtils.writeSmallFile(dir, story.getSelector() + ".header",
 174                                         gopher.exportHeader(story));
 175                         IOUtils.writeSmallFile(dir, story.getSelector() + ".header.html",
 176                                         html.exportHeader(story));
 177
 178                         IOUtils.writeSmallFile(dir, story.getSelector(),
 179                                         gopher.export(story));
 180                         IOUtils.writeSmallFile(dir, story.getSelector() + ".html",
 181                                         html.export(story));
 182                 }
 183
 184                 // Finding headers of all stories in cache:
 185                 File varDir = new File(dir, support.getSelector());
 186                 String[] headers = varDir.list(new FilenameFilter() {
 187                         @Override
 188                         public boolean accept(File dir, String name) {
 189                                 return name.endsWith(".header");
 190                         }
 191                 });
 192
 193                 // Reverse sort:
 194                 Arrays.sort(headers);
 195                 List<String> tmp = Arrays.asList(headers);
 196                 Collections.reverse(tmp);
 197                 headers = tmp.toArray(new String[] {});
 198                 //
 199
 200                 // Write the main index (with "MORE" links if needed)
 201                 int page = 0;
 202                 List<String> gopherLines = new ArrayList<String>();
 203                 List<String> htmlLines = new ArrayList<String>();
 204                 gopherLines.add(gopher.getIndexHeader(support));
 205                 htmlLines.add(html.getIndexHeader(support));
 206                 for (i = 0; i < headers.length; i++) {
 207                         File gopherFile = new File(varDir, headers[i]);
 208                         File htmlFile = new File(varDir, headers[i] + ".html");
 209
 210                         if (gopherFile.exists())
 211                                 gopherLines.add(IOUtils.readSmallFile(gopherFile));
 212                         if (htmlFile.exists())
 213                                 htmlLines.add(IOUtils.readSmallFile(htmlFile));
 214
 215                         boolean enoughStories = (i > 0 && i % maxStories == 0);
 216                         boolean last = i == headers.length - 1;
 217                         if (enoughStories || last) {
 218                                 if (!last) {
 219                                         gopherLines.add(getLink("More", support.getSelector()
 220                                                         + "gophermap_" + (page + 1), true, false));
 221
 222                                         htmlLines.add(getLink("More", "index_" + (page + 1)
 223                                                         + ".html", true, true));
 224                                 }
 225
 226                                 gopherLines.add(gopher.getIndexFooter(support));
 227                                 htmlLines.add(html.getIndexFooter(support));
 228                                 write(gopherLines, varDir, "gophermap", "", page);
 229                                 write(htmlLines, varDir, "index", ".html", page);
 230                                 gopherLines = new ArrayList<String>();
 231                                 htmlLines = new ArrayList<String>();
 232                                 page++;
 233                         }
 234                 }
 235         }
 236
 237         /**
 238          * Write an index/gophermap file with the given link content for the
 239          * selected supported web site.
 240          *
 241          * @param lines
 242          *            the link content (the stories and a short description)
 243          * @param varDir
 244          *            the base directory to write into
 245          * @param basename
 246          *            the base file name
 247          * @param ext
 248          *            the file extension (for instance, ".html")
 249          * @param page
 250          *            the page number (0 = main index)
 251          *
 252          * @throws IOException
 253          *             in case of I/O errors
 254          */
 255         private void write(List<String> lines, File varDir, String basename,
 256                         String ext, int page) throws IOException {
 257                 File file = new File(varDir, basename + (page > 0 ? "_" + page : "")
 258                                 + ext);
 259
 260                 FileWriter writer = new FileWriter(file);
 261                 try {
 262                         for (String line : lines) {
 263                                 writer.append(line).append("\r\n");
 264                         }
 265                 } finally {
 266                         writer.close();
 267                 }
 268         }
 269
 270         /**
 271          * Create a link.
 272          *
 273          * @param name
 274          *            the link name (what the user will see)
 275          * @param ref
 276          *            the actual link reference (the target)
 277          * @param menu
 278          *            menu (gophermap, i) mode -- not used in html mode
 279          * @param html
 280          *            TRUE for html mode, FALSE for gopher mode
 281          *
 282          * @return the ready-to-use link in a {@link String}
 283          */
 284         private String getLink(String name, String ref, boolean menu, boolean html) {
 285                 if (!html) {
 286                         return new StringBuilder().append((menu ? "1" : "0") + name)
 287                                         .append("\t").append(ref) //
 288                                         .append("\t").append(hostname) //
 289                                         .append("\t").append(Integer.toString(port)) //
 290                                         .append("\r\n").toString();
 291                 }
 292
 293                 return new StringBuilder().append(
 294                                 "<div class='site'><a href='" + ref + "'>" + name
 295                                                 + "</a></div>\n").toString();
 296         }
 297 }