- if (isHtml()) {
- // Special <HR> processing:
- content = content.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
- "<br/>* * *<br/>");
- }
-
- List<Paragraph> paras = new ArrayList<Paragraph>();
-
- if (content != null && !content.trim().isEmpty()) {
- if (isHtml()) {
- String[] tab = content.split("(<p>|</p>|<br>|<br/>)");
- pg.setMinMax(0, tab.length);
- int i = 1;
- for (String line : tab) {
- if (line.startsWith("[") && line.endsWith("]")) {
- pg.setName("Extracting image " + i);
- }
- paras.add(makeParagraph(source, line.trim()));
- pg.setProgress(i++);
- }
- pg.setName(null);
- } else {
- List<String> lines = new ArrayList<String>();
- BufferedReader buff = null;
- try {
- buff = new BufferedReader(
- new InputStreamReader(new ByteArrayInputStream(
- content.getBytes("UTF-8")), "UTF-8"));
- for (String line = buff.readLine(); line != null; line = buff
- .readLine()) {
- lines.add(line.trim());
- }
- } finally {
- if (buff != null) {
- buff.close();
- }
- }
-
- pg.setMinMax(0, lines.size());
- int i = 0;
- for (String line : lines) {
- if (line.startsWith("[") && line.endsWith("]")) {
- pg.setName("Extracting image " + i);
- }
- paras.add(makeParagraph(source, line));
- pg.setProgress(i++);
- }
- pg.setName(null);
- }
-
- // Check quotes for "bad" format
- List<Paragraph> newParas = new ArrayList<Paragraph>();
- for (Paragraph para : paras) {
- newParas.addAll(requotify(para));
- }
- paras = newParas;
-
- // Remove double blanks/brks
- fixBlanksBreaks(paras);
- }
-
- return paras;
- }
-
- /**
- * Convert the given line into a single {@link Paragraph}.
- *
- * @param source
- * the source URL of the story
- * @param line
- * the textual content of the paragraph
- *
- * @return the {@link Paragraph}
- */
- private Paragraph makeParagraph(URL source, String line) {
- URL image = null;
- if (line.startsWith("[") && line.endsWith("]")) {
- image = getImageUrl(this, source,
- line.substring(1, line.length() - 1).trim());
- }
-
- if (image != null) {
- return new Paragraph(image);
- }
-
- return processPara(line);
- }
-
- /**
- * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
- * those {@link Paragraph}s.
- * <p>
- * The resulting list will not contain a starting or trailing blank/break
- * nor 2 blanks or breaks following each other.
- *
- * @param paras
- * the list of {@link Paragraph}s to fix
- */
- protected void fixBlanksBreaks(List<Paragraph> paras) {
- boolean space = false;
- boolean brk = true;
- for (int i = 0; i < paras.size(); i++) {
- Paragraph para = paras.get(i);
- boolean thisSpace = para.getType() == ParagraphType.BLANK;
- boolean thisBrk = para.getType() == ParagraphType.BREAK;
-
- if (i > 0 && space && thisBrk) {
- paras.remove(i - 1);
- i--;
- } else if ((space || brk) && (thisSpace || thisBrk)) {
- paras.remove(i);
- i--;
- }