1 package be
.nikiroo
.fanfix
.supported
;
3 import java
.io
.BufferedReader
;
4 import java
.io
.ByteArrayInputStream
;
5 import java
.io
.IOException
;
6 import java
.io
.InputStreamReader
;
8 import java
.util
.ArrayList
;
11 import be
.nikiroo
.fanfix
.Instance
;
12 import be
.nikiroo
.fanfix
.bundles
.Config
;
13 import be
.nikiroo
.fanfix
.bundles
.StringId
;
14 import be
.nikiroo
.fanfix
.data
.Chapter
;
15 import be
.nikiroo
.fanfix
.data
.Paragraph
;
16 import be
.nikiroo
.fanfix
.data
.Paragraph
.ParagraphType
;
17 import be
.nikiroo
.utils
.Image
;
18 import be
.nikiroo
.utils
.Progress
;
19 import be
.nikiroo
.utils
.StringUtils
;
22 * Helper class for {@link BasicSupport}, mostly dedicated to {@link Paragraph}
23 * and text formating for the {@link BasicSupport} class itself (not its
28 class BasicSupportPara
{
30 private static char openQuote
= Instance
.getTrans().getCharacter(
31 StringId
.OPEN_SINGLE_QUOTE
);
32 private static char closeQuote
= Instance
.getTrans().getCharacter(
33 StringId
.CLOSE_SINGLE_QUOTE
);
34 private static char openDoubleQuote
= Instance
.getTrans().getCharacter(
35 StringId
.OPEN_DOUBLE_QUOTE
);
36 private static char closeDoubleQuote
= Instance
.getTrans().getCharacter(
37 StringId
.CLOSE_DOUBLE_QUOTE
);
40 * Create a {@link Chapter} object from the given information, formatting
41 * the content as it should be.
44 * the linked {@link BasicSupport}
46 * the source of the story
54 * the optional progress reporter
56 * TRUE if the input content is in HTML mode
58 * @return the {@link Chapter}
61 * in case of I/O error
63 public static Chapter
makeChapter(BasicSupport support
, URL source
,
64 int number
, String name
, String content
, boolean html
, Progress pg
)
66 // Chapter name: process it correctly, then remove the possible
67 // redundant "Chapter x: " in front of it, or "-" (as in
68 // "Chapter 5: - Fun!" after the ": " was automatically added)
69 String chapterName
= BasicSupportPara
.processPara(name
, false)
71 for (String lang
: Instance
.getConfig().getString(Config
.CHAPTER
)
73 String chapterWord
= Instance
.getConfig().getStringX(
74 Config
.CHAPTER
, lang
);
75 if (chapterName
.startsWith(chapterWord
)) {
76 chapterName
= chapterName
.substring(chapterWord
.length())
82 if (chapterName
.startsWith(Integer
.toString(number
))) {
83 chapterName
= chapterName
.substring(
84 Integer
.toString(number
).length()).trim();
87 while (chapterName
.startsWith(":") || chapterName
.startsWith("-")) {
88 chapterName
= chapterName
.substring(1).trim();
92 Chapter chap
= new Chapter(number
, chapterName
);
94 if (content
!= null) {
95 List
<Paragraph
> paras
= makeParagraphs(support
, source
, content
,
98 for (Paragraph para
: paras
) {
99 words
+= para
.getWords();
101 chap
.setParagraphs(paras
);
102 chap
.setWords(words
);
109 * Check quotes for bad format (i.e., quotes with normal paragraphs inside)
110 * and requotify them (i.e., separate them into QUOTE paragraphs and other
111 * paragraphs (quotes or not)).
114 * the paragraph to requotify (not necessarily a quote)
116 * TRUE if the input content is in HTML mode
118 * @return the correctly (or so we hope) quotified paragraphs
120 private static List
<Paragraph
> requotify(Paragraph para
, boolean html
) {
121 List
<Paragraph
> newParas
= new ArrayList
<Paragraph
>();
123 if (para
.getType() == ParagraphType
.QUOTE
124 && para
.getContent().length() > 2) {
125 String line
= para
.getContent();
126 boolean singleQ
= line
.startsWith("" + openQuote
);
127 boolean doubleQ
= line
.startsWith("" + openDoubleQuote
);
129 // Do not try when more than one quote at a time
130 // (some stories are not easily readable if we do)
132 && line
.indexOf(closeQuote
, 1) < line
133 .lastIndexOf(closeQuote
)) {
138 && line
.indexOf(closeDoubleQuote
, 1) < line
139 .lastIndexOf(closeDoubleQuote
)) {
145 if (!singleQ
&& !doubleQ
) {
146 line
= openDoubleQuote
+ line
+ closeDoubleQuote
;
147 newParas
.add(new Paragraph(ParagraphType
.QUOTE
, line
, para
150 char open
= singleQ ? openQuote
: openDoubleQuote
;
151 char close
= singleQ ? closeQuote
: closeDoubleQuote
;
154 boolean inQuote
= false;
156 for (char car
: line
.toCharArray()) {
159 } else if (car
== close
) {
161 } else if (car
== '.' && !inQuote
) {
169 String rest
= line
.substring(posDot
+ 1).trim();
170 line
= line
.substring(0, posDot
+ 1).trim();
172 for (char car
: line
.toCharArray()) {
177 newParas
.add(new Paragraph(ParagraphType
.QUOTE
, line
, words
));
178 if (!rest
.isEmpty()) {
179 newParas
.addAll(requotify(processPara(rest
, html
), html
));
193 * Process a {@link Paragraph} from a raw line of text.
195 * Will also fix quotes and HTML encoding if needed.
200 * TRUE if the input content is in HTML mode
202 * @return the processed {@link Paragraph}
204 private static Paragraph
processPara(String line
, boolean html
) {
206 line
= StringUtils
.unhtml(line
).trim();
208 boolean space
= true;
210 boolean quote
= false;
211 boolean tentativeCloseQuote
= false;
216 StringBuilder builder
= new StringBuilder();
217 for (char car
: line
.toCharArray()) {
220 // dash, ndash and mdash: - – —
221 // currently: always use mdash
222 builder
.append(dashCount
== 1 ?
'-' : '—');
227 if (tentativeCloseQuote
) {
228 tentativeCloseQuote
= false;
229 if (Character
.isLetterOrDigit(car
)) {
232 // handle double-single quotes as double quotes
234 builder
.append(closeDoubleQuote
);
238 builder
.append(closeQuote
);
243 case ' ': // note: unbreakable space
246 case '\n': // just in case
247 case '\r': // just in case
248 if (builder
.length() > 0
249 && builder
.charAt(builder
.length() - 1) != ' ') {
256 if (space
|| (brk
&& quote
)) {
258 // handle double-single quotes as double quotes
260 builder
.deleteCharAt(builder
.length() - 1);
261 builder
.append(openDoubleQuote
);
263 builder
.append(openQuote
);
265 } else if (prev
== ' ' || prev
== car
) {
266 // handle double-single quotes as double quotes
268 builder
.deleteCharAt(builder
.length() - 1);
269 builder
.append(openDoubleQuote
);
271 builder
.append(openQuote
);
274 // it is a quote ("I'm off") or a 'quote' ("This
275 // 'good' restaurant"...)
276 tentativeCloseQuote
= true;
281 if (space
|| (brk
&& quote
)) {
283 builder
.append(openDoubleQuote
);
284 } else if (prev
== ' ') {
285 builder
.append(openDoubleQuote
);
287 builder
.append(closeDoubleQuote
);
321 if (space
|| (brk
&& quote
)) {
323 builder
.append(openQuote
);
325 // handle double-single quotes as double quotes
327 builder
.deleteCharAt(builder
.length() - 1);
328 builder
.append(openDoubleQuote
);
330 builder
.append(openQuote
);
344 // handle double-single quotes as double quotes
346 builder
.deleteCharAt(builder
.length() - 1);
347 builder
.append(closeDoubleQuote
);
349 builder
.append(closeQuote
);
358 if (space
|| (brk
&& quote
)) {
360 builder
.append(openDoubleQuote
);
362 builder
.append(openDoubleQuote
);
375 builder
.append(closeDoubleQuote
);
388 if (tentativeCloseQuote
) {
389 tentativeCloseQuote
= false;
390 builder
.append(closeQuote
);
393 line
= builder
.toString().trim();
395 ParagraphType type
= ParagraphType
.NORMAL
;
397 type
= ParagraphType
.BLANK
;
399 type
= ParagraphType
.BREAK
;
401 type
= ParagraphType
.QUOTE
;
404 return new Paragraph(type
, line
, words
);
408 * Convert the given content into {@link Paragraph}s.
411 * the linked {@link BasicSupport}
413 * the source URL of the story
415 * the textual content
417 * TRUE if the input content is in HTML mode
419 * the optional progress reporter
421 * @return the {@link Paragraph}s
423 * @throws IOException
424 * in case of I/O error
426 private static List
<Paragraph
> makeParagraphs(BasicSupport support
,
427 URL source
, String content
, boolean html
, Progress pg
)
434 // Special <HR> processing:
435 content
= content
.replaceAll("(<hr [^>]*>)|(<hr/>)|(<hr>)",
439 List
<Paragraph
> paras
= new ArrayList
<Paragraph
>();
441 if (content
!= null && !content
.trim().isEmpty()) {
443 String
[] tab
= content
.split("(<p>|</p>|<br>|<br/>)");
444 pg
.setMinMax(0, tab
.length
);
446 for (String line
: tab
) {
447 if (line
.startsWith("[") && line
.endsWith("]")) {
448 pg
.setName("Extracting image " + i
);
450 paras
.add(makeParagraph(support
, source
, line
.trim(), html
));
454 List
<String
> lines
= new ArrayList
<String
>();
455 BufferedReader buff
= null;
457 buff
= new BufferedReader(
458 new InputStreamReader(new ByteArrayInputStream(
459 content
.getBytes("UTF-8")), "UTF-8"));
460 for (String line
= buff
.readLine(); line
!= null; line
= buff
462 lines
.add(line
.trim());
470 pg
.setMinMax(0, lines
.size());
472 for (String line
: lines
) {
473 if (line
.startsWith("[") && line
.endsWith("]")) {
474 pg
.setName("Extracting image " + i
);
476 paras
.add(makeParagraph(support
, source
, line
, html
));
484 // Check quotes for "bad" format
485 List
<Paragraph
> newParas
= new ArrayList
<Paragraph
>();
486 for (Paragraph para
: paras
) {
487 newParas
.addAll(BasicSupportPara
.requotify(para
, html
));
491 // Remove double blanks/brks
492 fixBlanksBreaks(paras
);
499 * Convert the given line into a single {@link Paragraph}.
502 * the linked {@link BasicSupport}
504 * the source URL of the story
506 * the textual content of the paragraph
508 * TRUE if the input content is in HTML mode
510 * @return the {@link Paragraph}
512 private static Paragraph
makeParagraph(BasicSupport support
, URL source
,
513 String line
, boolean html
) {
515 if (line
.startsWith("[") && line
.endsWith("]")) {
516 image
= BasicSupportHelper
.getImage(support
, source
, line
517 .substring(1, line
.length() - 1).trim());
521 return new Paragraph(image
);
524 return BasicSupportPara
.processPara(line
, html
);
528 * Fix the {@link ParagraphType#BLANK}s and {@link ParagraphType#BREAK}s of
529 * those {@link Paragraph}s.
531 * The resulting list will not contain a starting or trailing blank/break
532 * nor 2 blanks or breaks following each other.
535 * the list of {@link Paragraph}s to fix
537 private static void fixBlanksBreaks(List
<Paragraph
> paras
) {
538 boolean space
= false;
540 for (int i
= 0; i
< paras
.size(); i
++) {
541 Paragraph para
= paras
.get(i
);
542 boolean thisSpace
= para
.getType() == ParagraphType
.BLANK
;
543 boolean thisBrk
= para
.getType() == ParagraphType
.BREAK
;
545 if (i
> 0 && space
&& thisBrk
) {
548 } else if ((space
|| brk
) && (thisSpace
|| thisBrk
)) {
557 // Remove blank/brk at start
559 && (paras
.get(0).getType() == ParagraphType
.BLANK
|| paras
.get(
560 0).getType() == ParagraphType
.BREAK
)) {
564 // Remove blank/brk at end
565 int last
= paras
.size() - 1;
567 && (paras
.get(last
).getType() == ParagraphType
.BLANK
|| paras
568 .get(last
).getType() == ParagraphType
.BREAK
)) {