1 package be
.nikiroo
.fanfix
.supported
;
3 import java
.io
.BufferedReader
;
4 import java
.io
.ByteArrayInputStream
;
5 import java
.io
.IOException
;
6 import java
.io
.InputStreamReader
;
8 import java
.util
.ArrayList
;
11 import be
.nikiroo
.fanfix
.Instance
;
12 import be
.nikiroo
.fanfix
.bundles
.Config
;
13 import be
.nikiroo
.fanfix
.bundles
.StringId
;
14 import be
.nikiroo
.fanfix
.data
.Chapter
;
15 import be
.nikiroo
.fanfix
.data
.Paragraph
;
16 import be
.nikiroo
.fanfix
.data
.Paragraph
.ParagraphType
;
17 import be
.nikiroo
.utils
.Image
;
18 import be
.nikiroo
.utils
.Progress
;
19 import be
.nikiroo
.utils
.StringUtils
;
22 * Helper class for {@link BasicSupport}, mostly dedicated to {@link Paragraph}
23 * and text formating for the {@link BasicSupport} class.
27 public class BasicSupportPara
{
29 private static char openQuote
= Instance
.getInstance().getTrans().getCharacter(StringId
.OPEN_SINGLE_QUOTE
);
30 private static char closeQuote
= Instance
.getInstance().getTrans().getCharacter(StringId
.CLOSE_SINGLE_QUOTE
);
31 private static char openDoubleQuote
= Instance
.getInstance().getTrans().getCharacter(StringId
.OPEN_DOUBLE_QUOTE
);
32 private static char closeDoubleQuote
= Instance
.getInstance().getTrans().getCharacter(StringId
.CLOSE_DOUBLE_QUOTE
);
34 // used by this class:
35 BasicSupportHelper bsHelper
;
36 BasicSupportImages bsImages
;
39 * Create a new {@link BasicSupportPara}.
41 * Note that you need an instance of both {@link BasicSupportHelper} and
42 * {@link BasicSupportImages} for it to work.
45 * the required {@link BasicSupportHelper}
47 * the required {@link BasicSupportImages}
49 public BasicSupportPara(BasicSupportHelper bsHelper
, BasicSupportImages bsImages
) {
50 this.bsHelper
= bsHelper
;
51 this.bsImages
= bsImages
;
55 * Create a {@link Chapter} object from the given information, formatting
56 * the content as it should be.
59 * the linked {@link BasicSupport} (can be NULL)
61 * the source of the story (for image lookup in the same path if
62 * the source is a file, can be NULL)
70 * the optional progress reporter
72 * TRUE if the input content is in HTML mode
74 * @return the {@link Chapter}, never NULL
77 * in case of I/O error
79 public Chapter
makeChapter(BasicSupport support
, URL source
,
80 int number
, String name
, String content
, boolean html
, Progress pg
)
82 // Chapter name: process it correctly, then remove the possible
83 // redundant "Chapter x: " in front of it, or "-" (as in
84 // "Chapter 5: - Fun!" after the ": " was automatically added)
85 String chapterName
= processPara(name
, false)
87 for (String lang
: Instance
.getInstance().getConfig().getList(Config
.CONF_CHAPTER
)) {
88 String chapterWord
= Instance
.getInstance().getConfig().getStringX(Config
.CONF_CHAPTER
, lang
);
89 if (chapterName
.startsWith(chapterWord
)) {
90 chapterName
= chapterName
.substring(chapterWord
.length())
96 if (chapterName
.startsWith(Integer
.toString(number
))) {
97 chapterName
= chapterName
.substring(
98 Integer
.toString(number
).length()).trim();
101 while (chapterName
.startsWith(":") || chapterName
.startsWith("-")) {
102 chapterName
= chapterName
.substring(1).trim();
106 Chapter chap
= new Chapter(number
, chapterName
);
108 if (content
!= null) {
109 List
<Paragraph
> paras
= makeParagraphs(support
, source
, content
,
112 for (Paragraph para
: paras
) {
113 words
+= para
.getWords();
115 chap
.setParagraphs(paras
);
116 chap
.setWords(words
);
123 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
124 * and requotify them (i.e., separate them into QUOTE paragraphs and other
125 * paragraphs (quotes or not)).
128 * the paragraph to requotify (not necessarily a quote)
130 * TRUE if the input content is in HTML mode
132 * @return the correctly (or so we hope) quotified paragraphs
134 protected List
<Paragraph
> requotify(Paragraph para
, boolean html
) {
135 List
<Paragraph
> newParas
= new ArrayList
<Paragraph
>();
137 if (para
.getType() == ParagraphType
.QUOTE
138 && para
.getContent().length() > 2) {
139 String line
= para
.getContent();
140 boolean singleQ
= line
.startsWith("" + openQuote
);
141 boolean doubleQ
= line
.startsWith("" + openDoubleQuote
);
143 // Do not try when more than one quote at a time
144 // (some stories are not easily readable if we do)
146 && line
.indexOf(closeQuote
, 1) < line
147 .lastIndexOf(closeQuote
)) {
152 && line
.indexOf(closeDoubleQuote
, 1) < line
153 .lastIndexOf(closeDoubleQuote
)) {
159 if (!singleQ
&& !doubleQ
) {
160 line
= openDoubleQuote
+ line
+ closeDoubleQuote
;
161 newParas
.add(new Paragraph(ParagraphType
.QUOTE
, line
, para
164 char open
= singleQ ? openQuote
: openDoubleQuote
;
165 char close
= singleQ ? closeQuote
: closeDoubleQuote
;
168 boolean inQuote
= false;
170 for (char car
: line
.toCharArray()) {
173 } else if (car
== close
) {
175 } else if (car
== '.' && !inQuote
) {
183 String rest
= line
.substring(posDot
+ 1).trim();
184 line
= line
.substring(0, posDot
+ 1).trim();
186 for (char car
: line
.toCharArray()) {
191 newParas
.add(new Paragraph(ParagraphType
.QUOTE
, line
, words
));
192 if (!rest
.isEmpty()) {
193 newParas
.addAll(requotify(processPara(rest
, html
), html
));
207 * Process a {@link Paragraph} from a raw line of text.
209 * Will also fix quotes and HTML encoding if needed.
214 * TRUE if the input content is in HTML mode
216 * @return the processed {@link Paragraph}, never NULL
218 protected Paragraph
processPara(String line
, boolean html
) {
220 line
= StringUtils
.unhtml(line
).trim();
222 boolean space
= true;
224 boolean quote
= false;
225 boolean tentativeCloseQuote
= false;
230 StringBuilder builder
= new StringBuilder();
231 for (char car
: line
.toCharArray()) {
234 // dash, ndash and mdash: - – —
235 // currently: always use mdash
236 builder
.append(dashCount
== 1 ?
'-' : '—');
241 if (tentativeCloseQuote
) {
242 tentativeCloseQuote
= false;
243 if (Character
.isLetterOrDigit(car
)) {
246 // handle double-single quotes as double quotes
248 builder
.append(closeDoubleQuote
);
252 builder
.append(closeQuote
);
257 case ' ': // note: unbreakable space
260 case '\n': // just in case
261 case '\r': // just in case
262 if (builder
.length() > 0
263 && builder
.charAt(builder
.length() - 1) != ' ') {
270 if (space
|| (brk
&& quote
)) {
272 // handle double-single quotes as double quotes
274 builder
.deleteCharAt(builder
.length() - 1);
275 builder
.append(openDoubleQuote
);
277 builder
.append(openQuote
);
279 } else if (prev
== ' ' || prev
== car
) {
280 // handle double-single quotes as double quotes
282 builder
.deleteCharAt(builder
.length() - 1);
283 builder
.append(openDoubleQuote
);
285 builder
.append(openQuote
);
288 // it is a quote ("I'm off") or a 'quote' ("This
289 // 'good' restaurant"...)
290 tentativeCloseQuote
= true;
295 if (space
|| (brk
&& quote
)) {
297 builder
.append(openDoubleQuote
);
298 } else if (prev
== ' ') {
299 builder
.append(openDoubleQuote
);
301 builder
.append(closeDoubleQuote
);
335 if (space
|| (brk
&& quote
)) {
337 builder
.append(openQuote
);
339 // handle double-single quotes as double quotes
341 builder
.deleteCharAt(builder
.length() - 1);
342 builder
.append(openDoubleQuote
);
344 builder
.append(openQuote
);
358 // handle double-single quotes as double quotes
360 builder
.deleteCharAt(builder
.length() - 1);
361 builder
.append(closeDoubleQuote
);
363 builder
.append(closeQuote
);
372 if (space
|| (brk
&& quote
)) {
374 builder
.append(openDoubleQuote
);
376 builder
.append(openDoubleQuote
);
389 builder
.append(closeDoubleQuote
);
402 if (tentativeCloseQuote
) {
403 tentativeCloseQuote
= false;
404 builder
.append(closeQuote
);
407 line
= builder
.toString().trim();
409 ParagraphType type
= ParagraphType
.NORMAL
;
411 type
= ParagraphType
.BLANK
;
413 type
= ParagraphType
.BREAK
;
415 type
= ParagraphType
.QUOTE
;
418 return new Paragraph(type
, line
, words
);
422 * Convert the given content into {@link Paragraph}s.
425 * the linked {@link BasicSupport} (can be NULL), used to
426 * download optional image content in []
428 * the source URL of the story (for image lookup in the same path
429 * if the source is a file, can be NULL)
431 * the textual content
433 * TRUE if the input content is in HTML mode
435 * the optional progress reporter
437 * @return the {@link Paragraph}s (can be empty but never NULL)
439 * @throws IOException
440 * in case of I/O error
442 protected List
<Paragraph
> makeParagraphs(BasicSupport support
,
443 URL source
, String content
, boolean html
, Progress pg
)
450 // Special <HR> processing:
451 content
= content
.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
455 List
<Paragraph
> paras
= new ArrayList
<Paragraph
>();
457 if (content
!= null && !content
.trim().isEmpty()) {
459 String
[] tab
= content
.split("(<p>|</p>|<br>|<br/>)");
460 pg
.setMinMax(0, tab
.length
);
462 for (String line
: tab
) {
463 if (line
.startsWith("[") && line
.endsWith("]")) {
464 pg
.setName("Extracting image " + i
);
466 paras
.add(makeParagraph(support
, source
, line
.trim(), html
));
470 List
<String
> lines
= new ArrayList
<String
>();
471 BufferedReader buff
= null;
473 buff
= new BufferedReader(
474 new InputStreamReader(new ByteArrayInputStream(
475 content
.getBytes("UTF-8")), "UTF-8"));
476 for (String line
= buff
.readLine(); line
!= null; line
= buff
478 lines
.add(line
.trim());
486 pg
.setMinMax(0, lines
.size());
488 for (String line
: lines
) {
489 if (line
.startsWith("[") && line
.endsWith("]")) {
490 pg
.setName("Extracting image " + i
);
492 paras
.add(makeParagraph(support
, source
, line
, html
));
500 // Check quotes for "bad" format
501 List
<Paragraph
> newParas
= new ArrayList
<Paragraph
>();
502 for (Paragraph para
: paras
) {
503 newParas
.addAll(requotify(para
, html
));
507 // Remove double blanks/brks
508 fixBlanksBreaks(paras
);
515 * Convert the given line into a single {@link Paragraph}.
518 * the linked {@link BasicSupport} (can be NULL), used to
519 * download optional image content in []
521 * the source URL of the story (for image lookup in the same path
522 * if the source is a file, can be NULL)
524 * the textual content of the paragraph
526 * TRUE if the input content is in HTML mode
528 * @return the {@link Paragraph}, never NULL
530 protected Paragraph
makeParagraph(BasicSupport support
, URL source
,
531 String line
, boolean html
) {
533 if (line
.startsWith("[") && line
.endsWith("]")) {
534 image
= bsHelper
.getImage(support
, source
, line
535 .substring(1, line
.length() - 1).trim());
539 return new Paragraph(image
);
542 return processPara(line
, html
);
546 * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
547 * those {@link Paragraph}s.
549 * The resulting list will not contain a starting or trailing blank/break
550 * nor 2 blanks or breaks following each other.
553 * the list of {@link Paragraph}s to fix
555 protected void fixBlanksBreaks(List
<Paragraph
> paras
) {
556 boolean space
= false;
558 for (int i
= 0; i
< paras
.size(); i
++) {
559 Paragraph para
= paras
.get(i
);
560 boolean thisSpace
= para
.getType() == ParagraphType
.BLANK
;
561 boolean thisBrk
= para
.getType() == ParagraphType
.BREAK
;
563 if (i
> 0 && space
&& thisBrk
) {
566 } else if ((space
|| brk
) && (thisSpace
|| thisBrk
)) {
575 // Remove blank/brk at start
577 && (paras
.get(0).getType() == ParagraphType
.BLANK
|| paras
.get(
578 0).getType() == ParagraphType
.BREAK
)) {
582 // Remove blank/brk at end
583 int last
= paras
.size() - 1;
585 && (paras
.get(last
).getType() == ParagraphType
.BLANK
|| paras
586 .get(last
).getType() == ParagraphType
.BREAK
)) {