private Type type;
private String id;
private String title;
+ private String author;
+ private String date;
+ private String category;
private String details;
private String urlInternal;
private String urlExternal;
* the news ID
* @param title
* the news title
+ * @param author
+ * the author name for the details
+ * @param date
+ * the post date for the details
+ * @param category
+ * the category for the details
* @param details
- * some details to add to the title
+ * some details to add to the title (author, date and category
+ * will be added in the getter if available)
* @param urlInternal
* the {@link URL} to get this news on the associated news site
* @param urlExternal
* @param content
* the story content
*/
- public Story(Type type, String id, String title, String details,
- String urlInternal, String urlExternal, String content) {
+ public Story(Type type, String id, String title, String author,
+ String date, String category, String details, String urlInternal,
+ String urlExternal, String content) {
this.type = type;
this.id = id;
this.title = title;
+ this.author = author;
+ this.date = date;
+ this.category = category;
this.details = details;
this.urlInternal = urlInternal;
this.urlExternal = urlExternal;
* @return the details
*/
public String getDetails() {
+ String details = "";
+
+ if (category != null && !category.trim().isEmpty())
+ details += "[" + category + "] ";
+ if (date != null && !date.trim().isEmpty())
+ details += date + " ";
+ if (author != null && !author.trim().isEmpty())
+ details += "(" + this.author + ") ";
+ if (this.details != null && !this.details.trim().isEmpty())
+ details += "\n" + this.details;
+
return details;
}
package be.nikiroo.gofetch.support;
import java.io.IOException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
import java.util.ArrayList;
+import java.util.Date;
import java.util.List;
import org.jsoup.helper.StringUtil;
import be.nikiroo.gofetch.data.Story;
import be.nikiroo.utils.Downloader;
+/**
+ * Base class for website support.
+ *
+ * @author niki
+ */
public abstract class BasicSupport {
+ /** The downloader to use for all websites. */
protected static Downloader downloader = new Downloader("gofetcher");
+ /**
+ * The support type (each website we support has a single type).
+ *
+ * @author niki
+ */
public enum Type {
- SLASHDOT, PIPEDOT, LWN, LEMONDE, REGISTER, TOOLINUX,
+ /** EN: Any, but mostly IT/Sci */
+ SLASHDOT,
+ /** EN: Clone of Slashdot, mostly abandoned */
+ PIPEDOT,
+ /** EN: Linux */
+ LWN,
+ /** FR: Any */
+ LEMONDE,
+ /** EN: IT */
+ REGISTER,
+ /** FR: Linux */
+ TOO_LINUX,
}
/**
*
* @param text
* the text to process
- * @return
+ *
+ * @return the resulting text
*/
public String processText(String text);
*/
abstract public void fetch(Story story) throws IOException;
+ /**
+ * The website textual description, to add in the dispatcher page.
+ * <p>
+ * Should be short.
+ *
+ * @return the description
+ */
abstract public String getDescription();
+ /**
+ * The gopher "selector" to use for output.
+ * <p>
+ * A kind of "URL path", like "/news/" or "/misc/news/" or...
+ *
+ * @return the selector
+ */
public String getSelector() {
return getSelector(type);
}
+ /**
+ * The support type.
+ *
+ * @return the type
+ */
public Type getType() {
return type;
}
+ /**
+ * The support type.
+ *
+ * @param type
+ * the new type
+ */
protected void setType(Type type) {
this.type = type;
}
/**
+ * The {@link String} to append to the selector (the selector will be
+ * constructed as "this string" then "/type/".
+ *
* @param preselector
* the preselector to set
*/
case REGISTER:
support = new TheRegister();
break;
- case TOOLINUX:
+ case TOO_LINUX:
support = new TooLinux();
break;
}
return support;
}
+ /**
+ * The gopher "selector" to use for output for this type, using the
+ * preselector.
+ * <p>
+ * A kind of "URL path", like "/news/" or "/misc/news/" or...
+ *
+ * @param type
+ * the type to get the selector of
+ *
+ * @return the selector
+ */
static public String getSelector(Type type) {
return preselector + "/" + type + "/";
}
final StringBuilder currentLine = new StringBuilder();
final List<Integer> quoted = new ArrayList<Integer>();
final List<Node> ignoredNodes = new ArrayList<Node>();
- final List<String> footnotes = new ArrayList<String>();
if (element != null) {
new NodeTraversor(new NodeVisitor() {
if (block && currentLine.length() > 0) {
currentLine.append("\n");
}
-
- if (!element.absUrl("href").trim().isEmpty()) {
- footnotes.add(element.absUrl("href"));
- currentLine.append("[" + footnotes.size() + "]");
- }
} else if (node instanceof TextNode) {
TextNode textNode = (TextNode) node;
String line = StringUtil.normaliseWhitespace(textNode
lines.set(i, lines.get(i).replace(" ", " ").trim());
}
- if (footnotes.size() > 0) {
- lines.add("");
- lines.add("");
- lines.add("");
- lines.add("");
- for (int i = 0; i < footnotes.size(); i++) {
- lines.add("[" + (i + 1) + "] " + footnotes.get(i));
- }
+ return lines;
+ }
+
+ /**
+ * Reformat the date if possible.
+ *
+ * @param date
+ * the input date
+ *
+ * @return the reformated date, or the same value if it was not parsable
+ */
+ static protected String date(String date) {
+ SimpleDateFormat out = new SimpleDateFormat("yyyy/MM/dd");
+
+ long epoch = 0;
+ try {
+ epoch = Long.parseLong(date);
+ } catch (Exception e) {
+ epoch = 0;
}
- return lines;
+ if (epoch > 0) {
+ return out.format(new Date(1000 * epoch));
+ }
+
+ try {
+ Date dat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXXX")
+ .parse(date.trim());
+ return out.format(dat);
+ } catch (ParseException e) {
+ return date;
+ }
}
}
}
body = body.trim();
+ int pos;
+
+ String categ = "";
+ pos = details.indexOf("]");
+ if (pos >= 0) {
+ categ = details.substring(1, pos + 1).trim();
+ }
+
String author = "";
- int pos = details.indexOf(" by ");
+ pos = details.indexOf(" by ");
if (pos >= 0) {
author = details.substring(pos + " by ".length()).trim();
}
pos = details.indexOf(" Posted ");
if (pos >= 0) {
date = details.substring(pos + " Posted ".length()).trim();
+ pos = details.indexOf(" by ");
+ if (pos >= 0) {
+ author = details.substring(0, pos).trim();
+ }
}
+ // We extracted everything from details so...
+ details = "";
+
String id = "";
String intUrl = "";
String extUrl = "";
id = intUrl.replaceAll("[^0-9]", "");
}
- list.add(new Story(getType(), id, title, details, intUrl, extUrl,
- body));
+ list.add(new Story(getType(), id, title, author, date, categ,
+ details, intUrl, extUrl, body));
}
return list;
import be.nikiroo.gofetch.data.Comment;
import be.nikiroo.gofetch.data.Story;
+/**
+ * Support <a href="http://www.lemonde.fr/">http://www.lemonde.fr/</a>.
+ *
+ * @author niki
+ */
public class LeMonde extends BasicSupport {
@Override
public String getDescription() {
&& contentElements.size() > 0) {
String id = times.get(0).attr("datetime").replace(":", "_")
.replace("+", "_");
- String title = "[" + topic + "] "
- + titleElements.get(0).text();
+ String title = titleElements.get(0).text();
+ String date = date(titleElements.get(0).text());
String content = contentElements.get(0).text();
String intUrl = "";
String extUrl = "";
+ String author = "";
String details = "";
Elements detailsElements = article
.getElementsByClass("signature");
if (detailsElements.size() > 0) {
- details = detailsElements.get(0).text();
+ author = detailsElements.get(0).text();
}
Elements links = titleElements.get(0).getElementsByTag("a");
if (links.size() > 0) {
intUrl = links.get(0).absUrl("href");
- list.add(new Story(getType(), id, title, details,
- intUrl, extUrl, content));
+ list.add(new Story(getType(), id, title, author, date,
+ topic, details, intUrl, extUrl, content));
}
}
}
}
}
- list.add(new Story(getType(), id, title.text(), details, intUrl,
- extUrl, body));
+ list.add(new Story(getType(), id, title.text(), "", "", "",
+ details, intUrl, extUrl, body));
}
return list;
import be.nikiroo.gofetch.data.Comment;
import be.nikiroo.gofetch.data.Story;
+import be.nikiroo.utils.StringUtils;
/**
* Support <a href='https://slashdot.org/'>https://slashdot.org/</a>.
details = detailsElements.get(0).text();
}
+ // details:
+ // "Posted by AUTHOR on DATE from the further-crackdown dept."
+ String author = "";
+ int pos = details.indexOf(" on ");
+ if (details.startsWith("Posted by ") && pos >= 0) {
+ author = details.substring("Posted by ".length(), pos).trim();
+ }
+ pos = details.indexOf(" from the ");
+ if (pos >= 0) {
+ details = details.substring(pos).trim();
+ }
+
String body = "";
Element bodyElement = doc.getElementById("text-" + id);
if (bodyElement != null) {
body = bodyElement.text();
}
- list.add(new Story(getType(), id, title.text(), details, intUrl,
- extUrl, body));
+ String categ = "";
+ Element categElement = doc.getElementsByClass("topic").first();
+ if (categElement != null) {
+ categ = StringUtils.unhtml(categElement.text()).trim();
+ }
+
+ String date = "";
+ Element dateElement = doc.getElementsByTag("time").first();
+ if (dateElement != null) {
+ date = StringUtils.unhtml(dateElement.text()).trim();
+ }
+
+ list.add(new Story(getType(), id, title.text(), author, date,
+ categ, details, intUrl, extUrl, body));
}
return list;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
-import java.text.SimpleDateFormat;
import java.util.ArrayList;
-import java.util.Date;
import java.util.List;
import org.jsoup.helper.DataUtil;
import be.nikiroo.gofetch.data.Story;
import be.nikiroo.utils.StringUtils;
+/**
+ * Support <a
+ * href="https://www.theregister.co.uk/">https://www.theregister.co.uk/</a>.
+ *
+ * @author niki
+ */
public class TheRegister extends BasicSupport {
@Override
public String getDescription() {
String date = "";
String details = "";
String body = "";
+ String categ = "";
+ String author = ""; // nope
- String topic = "";
- Element topicElement = article.previousElementSibling();
- if (topicElement != null) {
- topic = "[" + topicElement.text().trim() + "] ";
+ Element categElement = article.previousElementSibling();
+ if (categElement != null) {
+ categ = categElement.text().trim();
}
+
Element titleElement = article.getElementsByTag("h4").first();
if (titleElement != null) {
title = StringUtils.unhtml(titleElement.text()).trim();
}
- title = topic + title;
Element dateElement = article.getElementsByClass("time_stamp")
.first();
details += StringUtils.unhtml(detailsElement.text()).trim();
}
- list.add(new Story(getType(), id, title, details, intUrl, extUrl,
- body));
+ list.add(new Story(getType(), id, title, author, date, categ,
+ details, intUrl, extUrl, body));
}
return list;
}
}
}
-
- // Return display date from epoch String, or "" if error
- private static String date(String epochString) {
- long epoch = 0;
- try {
- epoch = Long.parseLong(epochString);
- } catch (Exception e) {
- epoch = 0;
- }
-
- if (epoch > 0) {
- return new SimpleDateFormat("dd MMM YYYY").format(new Date(
- 1000 * epoch));
- }
-
- return "";
- }
}
import be.nikiroo.gofetch.data.Story;
import be.nikiroo.utils.StringUtils;
+/**
+ * Support <a href="https://www.toolinux.com/">https://www.toolinux.com/</a>.
+ *
+ * @author niki
+ */
public class TooLinux extends BasicSupport {
@Override
public String getDescription() {
- return "TooLinux: Premier quotidien francophone d'actualité généraliste sur Linux, les logiciels libres et l'interopérabilité, depuis mars 2000.";
+ return "TooLinux: Actualité généraliste sur Linux et les logiciels libres";
}
@Override
String date = "";
String details = "";
String body = "";
+ String author = ""; // nope
+ String categ = ""; // nope
Element urlElement = article.getElementsByTag("a").first();
if (urlElement != null) {
Element detailsElement = article.getElementsByClass("introduction")
.first();
- details = "(" + date + ") ";
if (detailsElement != null) {
details += StringUtils.unhtml(detailsElement.text()).trim();
}
- list.add(new Story(getType(), id, title, details, intUrl, extUrl,
- body));
+ list.add(new Story(getType(), id, title, author, date, categ,
+ details, intUrl, extUrl, body));
}
return list;