Fix layout issues in getContent() text

[gofetch.git] / src / be / nikiroo / gofetch / support / BasicSupport.java
diff --git a/src/be/nikiroo/gofetch/support/BasicSupport.java b/src/be/nikiroo/gofetch/support/BasicSupport.java

index dcd5e6ea295ad11f18372035d7b9033b1b7c5ba0..42761c9a82d1a7cd39bcd3bc96dd46e862f608a2 100644 (file)
--- a/src/be/nikiroo/gofetch/support/BasicSupport.java
+++ b/src/be/nikiroo/gofetch/support/BasicSupport.java
@@ -300,21 +300,7 @@ public abstract class BasicSupport {
                         Document doc = DataUtil.load(in, "UTF-8", url.toString());
                         Element article = getFullArticle(doc);
                         if (article != null) {
-                               StringBuilder builder = new StringBuilder();
-                               ElementProcessor eProc = getElementProcessorFullArticle();
-                               if (eProc != null) {
-                                       for (String line : toLines(article, eProc)) {
-                                               builder.append(line + "\n");
-                                       }
-                               } else {
-                                       builder.append(article.text());
-                               }
-
-                               // Content is too tight with a single break per line:
-                               fullContent = builder.toString().replace("\n", "\n\n") //
-                                               .replace("\n\n\n\n", "\n\n") //
-                                               .replace("\n\n\n\n", "\n\n") //
-                                               .trim();
+                               fullContent = getArticleText(article);
                         }
  
                         if (fullContent.isEmpty()) {
@@ -331,6 +317,33 @@ public abstract class BasicSupport {
                 }
         }
  
+       /**
+        * Return the text from this {@link Element}, using the
+        * {@link BasicSupport#getElementProcessorFullArticle()} processor logic.
+        * 
+        * @param article
+        *            the element to extract the text from
+        * 
+        * @return the text
+        */
+       protected String getArticleText(Element article) {
+               StringBuilder builder = new StringBuilder();
+               ElementProcessor eProc = getElementProcessorFullArticle();
+               if (eProc != null) {
+                       for (String line : toLines(article, eProc)) {
+                               builder.append(line + "\n");
+                       }
+               } else {
+                       builder.append(article.text());
+               }
+
+               // Content is too tight with a single break per line:
+               return builder.toString().replace("\n", "\n\n") //
+                               .replace("\n\n\n\n", "\n\n") //
+                               .replace("\n\n\n\n", "\n\n") //
+                               .trim();
+       }
+
         /**
          * Return the full article if available (this is the article to retrieve
          * from the newly downloaded page at {@link Story#getUrlInternal()}).