Elements contentElements = article.getElementsByClass("txt3");
if (times.size() > 0 && titleElements.size() > 0
&& contentElements.size() > 0) {
- String id = times.get(0).attr("datetime").replace(":", "_");
+ String id = times.get(0).attr("datetime").replace(":", "_")
+ .replace("+", "_");
String title = "[" + topic + "] "
+ titleElements.get(0).text();
String content = contentElements.get(0).text();
Document doc = DataUtil.load(in, "UTF-8", url.toString());
Element article = doc.getElementById("articleBody");
if (article != null) {
- for (String line : toLines(article, new QuoteProcessor() {
- @Override
- public String processText(String text) {
- return text;
- }
-
+ for (String line : toLines(article, new BasicElementProcessor() {
@Override
public boolean ignoreNode(Node node) {
if (node instanceof Element) {
return false;
}
- @Override
- public boolean detectQuote(Node node) {
- return false;
- }
-
@Override
public String manualProcessing(Node node) {
if (node instanceof Element) {