Reddit: fix problem with new kind of html (wip)
[gofetch.git] / src / be / nikiroo / gofetch / support / BasicSupport.java
index dcd5e6ea295ad11f18372035d7b9033b1b7c5ba0..17a3c151750e9c7e56169fe4fd8be87b9eb39855 100644 (file)
@@ -133,9 +133,9 @@ public abstract class BasicSupport {
                                                id = "0" + id;
                                        }
                                } else {
-                                       id = date.replace(":", "_").replace("+", "_");
+                                       id = date.replace(":", "_").replace("+", "_").replace("/", "-");
                                }
-
+                               
                                date = date(date);
 
                                list.add(new Story(getType(), id, title, author, date, categ,
@@ -300,21 +300,7 @@ public abstract class BasicSupport {
                        Document doc = DataUtil.load(in, "UTF-8", url.toString());
                        Element article = getFullArticle(doc);
                        if (article != null) {
-                               StringBuilder builder = new StringBuilder();
-                               ElementProcessor eProc = getElementProcessorFullArticle();
-                               if (eProc != null) {
-                                       for (String line : toLines(article, eProc)) {
-                                               builder.append(line + "\n");
-                                       }
-                               } else {
-                                       builder.append(article.text());
-                               }
-
-                               // Content is too tight with a single break per line:
-                               fullContent = builder.toString().replace("\n", "\n\n") //
-                                               .replace("\n\n\n\n", "\n\n") //
-                                               .replace("\n\n\n\n", "\n\n") //
-                                               .trim();
+                               fullContent = getArticleText(article);
                        }
 
                        if (fullContent.isEmpty()) {
@@ -331,6 +317,33 @@ public abstract class BasicSupport {
                }
        }
 
+       /**
+        * Return the text from this {@link Element}, using the
+        * {@link BasicSupport#getElementProcessorFullArticle()} processor logic.
+        * 
+        * @param article
+        *            the element to extract the text from
+        * 
+        * @return the text
+        */
+       protected String getArticleText(Element article) {
+               StringBuilder builder = new StringBuilder();
+               ElementProcessor eProc = getElementProcessorFullArticle();
+               if (eProc != null) {
+                       for (String line : toLines(article, eProc)) {
+                               builder.append(line + "\n");
+                       }
+               } else {
+                       builder.append(article.text());
+               }
+
+               // Content is too tight with a single break per line:
+               return builder.toString().replace("\n", "\n\n") //
+                               .replace("\n\n\n\n", "\n\n") //
+                               .replace("\n\n\n\n", "\n\n") //
+                               .trim();
+       }
+
        /**
         * Return the full article if available (this is the article to retrieve
         * from the newly downloaded page at {@link Story#getUrlInternal()}).
@@ -588,6 +601,9 @@ public abstract class BasicSupport {
                        case SEPT_SUR_SEPT:
                                support = new SeptSurSept();
                                break;
+                       case REDDIT:
+                               support = new Reddit();
+                               break;
                        }
 
                        if (support != null) {
@@ -806,7 +822,7 @@ public abstract class BasicSupport {
                        Date dat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssXXX")
                                        .parse(date.trim());
                        return out.format(dat);
-               } catch (ParseException e) {
+               } catch (Exception e) {
                        return date;
                }
        }