From 5c056aade2e020276e039f81acba7bcb2b12e87f Mon Sep 17 00:00:00 2001 From: Niki Roo Date: Mon, 7 Aug 2017 19:17:35 +0200 Subject: [PATCH] Fix download order and comments/content storing --- src/be/nikiroo/gofetch/Fetcher.java | 165 ++++++++++-------- src/be/nikiroo/gofetch/data/Story.java | 37 ++++ src/be/nikiroo/gofetch/output/Gopher.java | 26 +-- src/be/nikiroo/gofetch/output/Html.java | 18 +- src/be/nikiroo/gofetch/output/Output.java | 11 +- .../nikiroo/gofetch/support/BasicSupport.java | 13 +- src/be/nikiroo/gofetch/support/LWN.java | 37 ++-- src/be/nikiroo/gofetch/support/Pipedot.java | 4 +- src/be/nikiroo/gofetch/support/Slashdot.java | 4 +- 9 files changed, 184 insertions(+), 131 deletions(-) diff --git a/src/be/nikiroo/gofetch/Fetcher.java b/src/be/nikiroo/gofetch/Fetcher.java index bf3af6e..725355c 100644 --- a/src/be/nikiroo/gofetch/Fetcher.java +++ b/src/be/nikiroo/gofetch/Fetcher.java @@ -7,7 +7,6 @@ import java.io.IOException; import java.util.Arrays; import java.util.List; -import be.nikiroo.gofetch.data.Comment; import be.nikiroo.gofetch.data.Story; import be.nikiroo.gofetch.output.Gopher; import be.nikiroo.gofetch.output.Html; @@ -72,47 +71,53 @@ public class Fetcher { * in case of I/O error */ public void start() throws IOException { - File cache = new File(dir, preselector); - cache.mkdirs(); - File cacheHtml = new File(cache, "index.html"); - cache = new File(cache, ".cache"); + StringBuilder gopherBuilder = new StringBuilder(); + StringBuilder htmlBuilder = new StringBuilder(); + + BasicSupport.setPreselector(preselector); + for (Type type : Type.values()) { + BasicSupport support = BasicSupport.getSupport(type); + + if (type == this.type || this.type == null) { + list(support); + } + + gopherBuilder.append("1" + support.getDescription()).append("\t") + .append("1" + support.getSelector()) // + .append("\t").append(hostname) // + .append("\t").append(Integer.toString(port)) // + .append("\r\n"); + + String ref = support.getSelector(); + while (ref.startsWith("/")) { + ref = ref.substring(1); + } + htmlBuilder.append("
" + + support.getDescription() + "
\n"); + } + + File gopherCache = new File(dir, preselector); + gopherCache.mkdirs(); + File htmlIndex = new File(gopherCache, "index.html"); + gopherCache = new File(gopherCache, ".cache"); Output gopher = new Gopher(null, hostname, preselector, port); Output html = new Html(null, hostname, preselector, port); - FileWriter writer = new FileWriter(cache); + FileWriter writer = new FileWriter(gopherCache); try { - FileWriter writerHtml = new FileWriter(cacheHtml); - try { - writer.append(gopher.getIndexHeader()); - writerHtml.append(html.getIndexHeader()); - - BasicSupport.setPreselector(preselector); - for (Type type : Type.values()) { - BasicSupport support = BasicSupport.getSupport(type); - - if (type == this.type || this.type == null) { - list(support); - } - - writer.append("1" + support.getDescription()).append("\t") - .append("1" + support.getSelector()) // - .append("\t").append(hostname) // - .append("\t").append(Integer.toString(port)) // - .append("\r\n"); - String ref = support.getSelector(); - while (ref.startsWith("/")) { - ref = ref.substring(1); - } - writerHtml.append("
" + support.getDescription() + "
\n"); - } - - writer.append(gopher.getIndexFooter()); - writerHtml.append(html.getIndexFooter()); - } finally { - writerHtml.close(); - } + writer.append(gopher.getIndexHeader()); + writer.append(gopherBuilder.toString()); + writer.append(gopher.getIndexFooter()); + } finally { + writer.close(); + } + + try { + writer = new FileWriter(htmlIndex); + writer.append(html.getIndexHeader()); + writer.append(htmlBuilder.toString()); + writer.append(html.getIndexFooter()); } finally { writer.close(); } @@ -128,35 +133,40 @@ public class Fetcher { * in case of I/O error **/ private void list(BasicSupport support) throws IOException { + // Get stories: + System.err + .print("Listing recent news for " + support.getType() + "..."); + List stories = support.list(); + System.err.println(" " + stories.size() + " stories found!"); + + // Get comments (and update stories if needed): + int i = 1; + for (Story story : stories) { + System.err.println(String.format("%02d/%02d", i, stories.size()) + + " Fetching full story " + story.getId() + "..."); + support.fetch(story); + i++; + } + Output gopher = new Gopher(support.getType(), hostname, preselector, port); Output html = new Html(support.getType(), hostname, preselector, port); new File(dir, support.getSelector()).mkdirs(); - System.err - .print("Listing recent news for " + support.getType() + "..."); - List stories = support.list(); - System.err.println(" " + stories.size() + " stories found!"); - int i = 1; for (Story story : stories) { IOUtils.writeSmallFile(dir, story.getSelector() + ".header", - gopher.export(story)); + gopher.exportHeader(story)); IOUtils.writeSmallFile(dir, story.getSelector() + ".header.html", - html.export(story)); - - System.err.println(String.format("%02d/%02d", i, stories.size()) - + " Fetching comments for story " + story.getId() + "..."); - List comments = support.getComments(story); + html.exportHeader(story)); IOUtils.writeSmallFile(dir, story.getSelector(), - gopher.export(story, comments)); + gopher.export(story)); IOUtils.writeSmallFile(dir, story.getSelector() + ".html", - html.export(story, comments)); - - i++; + html.export(story)); } + // Finding headers of all stories in cache: File varDir = new File(dir, support.getSelector()); String[] headers = varDir.list(new FilenameFilter() { @Override @@ -165,29 +175,36 @@ public class Fetcher { } }); - File cache = new File(varDir, ".cache"); - File cacheHtml = new File(varDir, "index.html"); - FileWriter writer = new FileWriter(cache); + // Finding which ones to show: + int from = 0; + int to = 0; + if (headers.length > 0) { + Arrays.sort(headers); + from = headers.length - 1; + to = headers.length - maxStories; + if (to < 0) { + to = 0; + } + } + + // Writing the cache/index files with the stories: + File gopherCache = new File(varDir, ".cache"); + FileWriter writer = new FileWriter(gopherCache); + try { + for (i = from; i >= to; i--) { + writer.append(IOUtils + .readSmallFile(new File(varDir, headers[i]))); + } + } finally { + writer.close(); + } + + File htmlIndex = new File(varDir, "index.html"); + writer = new FileWriter(htmlIndex); try { - FileWriter writerHtml = new FileWriter(cacheHtml); - try { - if (headers.length > 0) { - Arrays.sort(headers); - int from = headers.length - 1; - int to = headers.length - maxStories; - if (to < 0) { - to = 0; - } - for (i = from; i >= to; i--) { - writer.append(IOUtils.readSmallFile(new File(varDir, - headers[i]))); - - writerHtml.append(IOUtils.readSmallFile(new File( - varDir, headers[i] + ".html"))); - } - } - } finally { - writerHtml.close(); + for (i = from; i >= to; i--) { + writer.append(IOUtils.readSmallFile(new File(varDir, headers[i] + + ".html"))); } } finally { writer.close(); diff --git a/src/be/nikiroo/gofetch/data/Story.java b/src/be/nikiroo/gofetch/data/Story.java index aa5aecc..9fd4c14 100644 --- a/src/be/nikiroo/gofetch/data/Story.java +++ b/src/be/nikiroo/gofetch/data/Story.java @@ -1,6 +1,7 @@ package be.nikiroo.gofetch.data; import java.net.URL; +import java.util.List; import be.nikiroo.gofetch.support.BasicSupport; import be.nikiroo.gofetch.support.BasicSupport.Type; @@ -19,6 +20,9 @@ public class Story { private String urlExternal; private String content; + private String fullContent; + private List comments; + /** * Create a news story. * @@ -46,6 +50,9 @@ public class Story { this.urlInternal = urlInternal; this.urlExternal = urlExternal; this.content = content; + + // Defaults fullContent to content + this.fullContent = content; } public String getSelector() { @@ -93,4 +100,34 @@ public class Story { public String getContent() { return content; } + + /** + * @return the fullContent + */ + public String getFullContent() { + return fullContent; + } + + /** + * @param fullContent + * the fullContent to set + */ + public void setFullContent(String fullContent) { + this.fullContent = fullContent; + } + + /** + * @return the comments + */ + public List getComments() { + return comments; + } + + /** + * @param comments + * the comments to set + */ + public void setComments(List comments) { + this.comments = comments; + } } \ No newline at end of file diff --git a/src/be/nikiroo/gofetch/output/Gopher.java b/src/be/nikiroo/gofetch/output/Gopher.java index 1ad9731..dc5ac80 100644 --- a/src/be/nikiroo/gofetch/output/Gopher.java +++ b/src/be/nikiroo/gofetch/output/Gopher.java @@ -1,7 +1,5 @@ package be.nikiroo.gofetch.output; -import java.util.List; - import be.nikiroo.gofetch.StringJustifier; import be.nikiroo.gofetch.data.Comment; import be.nikiroo.gofetch.data.Story; @@ -37,20 +35,20 @@ public class Gopher extends Output { } @Override - public String export(Story story) { + public String exportHeader(Story story) { return append(new StringBuilder(), story, false).append("i\r\ni\r\n") .toString(); } @Override - public String export(Story story, List comments) { + public String export(Story story) { StringBuilder builder = new StringBuilder(); append(builder, story, true); builder.append("i\r\n"); - if (comments != null) { - for (Comment comment : comments) { + if (story.getComments() != null) { + for (Comment comment : story.getComments()) { append(builder, comment, ""); } } @@ -85,17 +83,22 @@ public class Gopher extends Output { } private StringBuilder append(StringBuilder builder, Story story, - boolean links) { - if (links) { + boolean resume) { + if (!resume) { appendCenter(builder, story.getTitle(), true); builder.append("i\r\n"); appendLeft(builder, story.getDetails(), " "); builder.append("i\r\n"); + builder.append("i o News link: ").append(story.getUrlInternal()) .append("\r\n"); builder.append("i o Source link: ").append(story.getUrlExternal()) .append("\r\n"); builder.append("i\r\n"); + + builder.append("i\r\n"); + + appendLeft(builder, story.getFullContent(), " "); } else { builder.append('1').append(story.getTitle()) // .append('\t').append("0").append(story.getSelector()) // @@ -103,11 +106,10 @@ public class Gopher extends Output { .append('\t').append(port) // .append("\r\n"); appendLeft(builder, story.getDetails(), " "); - } - - builder.append("i\r\n"); + builder.append("i\r\n"); - appendLeft(builder, story.getContent(), " "); + appendLeft(builder, story.getContent(), " "); + } builder.append("i\r\n"); diff --git a/src/be/nikiroo/gofetch/output/Html.java b/src/be/nikiroo/gofetch/output/Html.java index fea5f67..b0ef7e2 100644 --- a/src/be/nikiroo/gofetch/output/Html.java +++ b/src/be/nikiroo/gofetch/output/Html.java @@ -1,7 +1,5 @@ package be.nikiroo.gofetch.output; -import java.util.List; - import be.nikiroo.gofetch.data.Comment; import be.nikiroo.gofetch.data.Story; import be.nikiroo.gofetch.support.BasicSupport.Type; @@ -17,7 +15,7 @@ public class Html extends Output { if (!sel.isEmpty()) { sel = "/1" + sel; } - + String gopherUrl = "gopher://" + hostname + sel + ":" + port; return "

News

\n"// @@ -35,7 +33,7 @@ public class Html extends Output { } @Override - public String export(Story story) { + public String exportHeader(Story story) { StringBuilder builder = new StringBuilder(); builder.append("
\n"); @@ -47,15 +45,15 @@ public class Html extends Output { } @Override - public String export(Story story, List comments) { + public String export(Story story) { StringBuilder builder = new StringBuilder(); builder.append("
\n"); appendHtml(builder, story, false); builder.append("
\n"); - if (comments != null) { - for (Comment comment : comments) { + if (story.getComments() != null) { + for (Comment comment : story.getComments()) { appendHtml(builder, comment, " "); } } @@ -106,7 +104,11 @@ public class Html extends Output { } builder.append("
\n"); - builder.append(" " + story.getContent() + "\n"); + if (resume) { + builder.append(" " + story.getContent() + "\n"); + } else { + builder.append(" " + story.getFullContent() + "\n"); + } builder.append("
\n"); return builder; diff --git a/src/be/nikiroo/gofetch/output/Output.java b/src/be/nikiroo/gofetch/output/Output.java index f293b04..db6554b 100644 --- a/src/be/nikiroo/gofetch/output/Output.java +++ b/src/be/nikiroo/gofetch/output/Output.java @@ -1,8 +1,5 @@ package be.nikiroo.gofetch.output; -import java.util.List; - -import be.nikiroo.gofetch.data.Comment; import be.nikiroo.gofetch.data.Story; import be.nikiroo.gofetch.support.BasicSupport.Type; @@ -66,24 +63,22 @@ public abstract class Output { abstract public String getIndexFooter(); /** - * Export a story (in resume mode). + * Export the header of a story (a resume mode). * * @param story * the story * * @return the resume */ - abstract public String export(Story story); + abstract public String exportHeader(Story story); /** * Export a full story with comments. * * @param story * the story - * @param comments - * the comments * * @return the story */ - abstract public String export(Story story, List comments); + abstract public String export(Story story); } diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java index d9e273a..7a1d0ea 100644 --- a/src/be/nikiroo/gofetch/support/BasicSupport.java +++ b/src/be/nikiroo/gofetch/support/BasicSupport.java @@ -7,7 +7,6 @@ import java.net.URLConnection; import java.util.List; import java.util.zip.GZIPInputStream; -import be.nikiroo.gofetch.data.Comment; import be.nikiroo.gofetch.data.Story; public abstract class BasicSupport { @@ -21,7 +20,17 @@ public abstract class BasicSupport { abstract public List list() throws IOException; - abstract public List getComments(Story story) throws IOException; + /** + * Fetch the full article content as well as all the comments associated to + * this {@link Story}, if any (can be empty, but not NULL). + * + * @param story + * the story to fetch the comments of + * + * @throws IOException + * in case of I/O error + */ + abstract public void fetch(Story story) throws IOException; abstract public String getDescription(); diff --git a/src/be/nikiroo/gofetch/support/LWN.java b/src/be/nikiroo/gofetch/support/LWN.java index e25bc92..08c9d5a 100644 --- a/src/be/nikiroo/gofetch/support/LWN.java +++ b/src/be/nikiroo/gofetch/support/LWN.java @@ -28,8 +28,7 @@ public class LWN extends BasicSupport { @Override public List list() throws IOException { // TODO: comments + do not get comment for [$] stories - // + update body on getComment (global change, also LinuxToday) - + List list = new ArrayList(); URL url = new URL("https://lwn.net/"); @@ -45,35 +44,33 @@ public class LWN extends BasicSupport { if (listings.size() == 0) { continue; } - + Element listing = listings.get(0); if (listing.children().size() < 2) { continue; } - String title = titles.get(0).text(); String details = listing.children().get(0).text(); String body = ""; // All but the first and two last children - for (int i = 1 ; i < listing.children().size() - 2; i++) { + for (int i = 1; i < listing.children().size() - 2; i++) { Element e = listing.children().get(i); body = body.trim() + " " + e.text().trim(); } body = body.trim(); - + String author = ""; int pos = details.indexOf(" by "); if (pos >= 0) { author = details.substring(pos + " by ".length()).trim(); } - + String date = ""; pos = details.indexOf(" Posted "); if (pos >= 0) { date = details.substring(pos + " Posted ".length()).trim(); } - String id = ""; String intUrl = ""; @@ -83,32 +80,26 @@ public class LWN extends BasicSupport { intUrl = idElem.absUrl("href"); pos = intUrl.indexOf("#Comments"); if (pos >= 0) { - intUrl = intUrl.substring(0, pos -1); + intUrl = intUrl.substring(0, pos - 1); } id = intUrl.replaceAll("[^0-9]", ""); } - list.add(new Story(getType(), id, title, details, intUrl, extUrl, body)); + list.add(new Story(getType(), id, title, details, intUrl, extUrl, + body)); } return list; } @Override - public List getComments(Story story) throws IOException { - List comments = new ArrayList(); - + public void fetch(Story story) throws IOException { /* - URL url = new URL(story.getUrlInternal()); - InputStream in = open(url); - Document doc = DataUtil.load(in, "UTF-8", url.toString()); - Elements listing = doc.getElementsByTag("main"); - if (listing.size() > 0) { - comments.addAll(getComments(listing.get(0))); - } - */ - - return comments; + * URL url = new URL(story.getUrlInternal()); InputStream in = + * open(url); Document doc = DataUtil.load(in, "UTF-8", url.toString()); + * Elements listing = doc.getElementsByTag("main"); if (listing.size() > + * 0) { comments.addAll(getComments(listing.get(0))); } + */ } private List getComments(Element listing) { diff --git a/src/be/nikiroo/gofetch/support/Pipedot.java b/src/be/nikiroo/gofetch/support/Pipedot.java index 4d68fe7..2436540 100644 --- a/src/be/nikiroo/gofetch/support/Pipedot.java +++ b/src/be/nikiroo/gofetch/support/Pipedot.java @@ -89,7 +89,7 @@ public class Pipedot extends BasicSupport { } @Override - public List getComments(Story story) throws IOException { + public void fetch(Story story) throws IOException { List comments = new ArrayList(); URL url = new URL(story.getUrlInternal()); @@ -100,7 +100,7 @@ public class Pipedot extends BasicSupport { comments.addAll(getComments(listing.get(0))); } - return comments; + story.setComments(comments); } private List getComments(Element listing) { diff --git a/src/be/nikiroo/gofetch/support/Slashdot.java b/src/be/nikiroo/gofetch/support/Slashdot.java index 5b5612f..6a53954 100644 --- a/src/be/nikiroo/gofetch/support/Slashdot.java +++ b/src/be/nikiroo/gofetch/support/Slashdot.java @@ -76,7 +76,7 @@ public class Slashdot extends BasicSupport { } @Override - public List getComments(Story story) throws IOException { + public void fetch(Story story) throws IOException { List comments = new ArrayList(); URL url = new URL(story.getUrlInternal()); @@ -87,7 +87,7 @@ public class Slashdot extends BasicSupport { comments.addAll(getComments(listing)); } - return comments; + story.setComments(comments); } private List getComments(Element listing) { -- 2.27.0