nsub: add SRT read support
authorNiki Roo <niki@nikiroo.be>
Sun, 15 May 2022 11:22:05 +0000 (13:22 +0200)
committerNiki Roo <niki@nikiroo.be>
Sun, 15 May 2022 11:22:05 +0000 (13:22 +0200)
README-fr.md
README.md
src/nsub/nsub.c
src/nsub/nsub_read_srt.c [new file with mode: 0644]
src/nsub/nsub_write_lrc.c
src/nsub/nsub_write_srt.c
src/nsub/nsub_write_webvtt.c

index 6743527b65dfe2319c78b8e0a602527af7105a0f..e665ab6e8dd1ebbaaaa625b51d58249bda233e95 100644 (file)
@@ -25,7 +25,7 @@ Il ne nécessite pas de librairies externes.
 - `SRT` fichiers sous-titres SubRip, ils accompagnent en général des films
 - `WebVTT` Web Video Text Tracks, un nouveau standard W3C
 
-Note : ce programme ne peut pas encore ouvrir des fichiers WebVTT ni SRT (il supporte toutefois les 3 formats en écriture)
+Note : ce programme ne peut pas encore ouvrir des fichiers WebVTT (il supporte toutefois les 3 formats en écriture)
 
 ## Options
 
index 295f017f6fe31610c96f531ddd488138f79d0f29..7642dcc3b64b2fec4b556ddb310be4da876152a3 100644 (file)
--- a/README.md
+++ b/README.md
@@ -25,7 +25,7 @@ It does not require external libraries.
 - `SRT` SubRip subtitle files, usually distributed with films
 - `WebVTT` Web Video Text Tracks, a new W3C standard
 
-Note: this program can not yet open WebVTT nor SRT files (it supports all 3 formats as output, though)
+Note: this program can not yet open WebVTT files (it supports all 3 formats as output, though)
 
 ## Options
 
index d5d39cbe50e93a07c96ebe0442c05591a80a6899..221aeb17e75229aad9fb93badfc7df2e57aa602a 100644 (file)
@@ -103,25 +103,32 @@ void song_add_meta(song_t *song, char *key, char *value) {
 }
 
 song_t *nsub_read(FILE *in, NSUB_FORMAT fmt) {
-       song_t *song = new_song();
+       song_t *song = NULL;
+       cstring_t *line = NULL;
 
-       cstring_t *line = new_cstring();
+       /* Which reader? */
+       int (*read_a_line)(song_t *, char *) = NULL;
+       switch (fmt) {
+       case NSUB_FMT_LRC:
+               read_a_line = nsub_read_lrc;
+               break;
+       case NSUB_FMT_SRT:
+               read_a_line = nsub_read_srt;
+               break;
+       default:
+               fprintf(stderr, "Unsupported read format %d\n", fmt);
+               goto fail;
+       }
+
+       /* Read it */
+       song = new_song();
+       line = new_cstring();
        size_t i = 0;
        while (cstring_readline(line, in)) {
                i++;
 
-               switch (fmt) {
-               case NSUB_FMT_LRC:
-                       if (!nsub_read_lrc(song, line->string)) {
-                               fprintf(stderr, "Read error on line %zu: <%s>\n", i,
-                                               line->string);
-                               song = NULL;
-                               goto fail;
-                       }
-                       break;
-               default:
-                       fprintf(stderr, "Unsupported read format %d\n", fmt);
-                       free_song(song);
+               if (!read_a_line(song, line->string)) {
+                       fprintf(stderr, "Read error on line %zu: <%s>\n", i, line->string);
                        song = NULL;
                        goto fail;
                }
diff --git a/src/nsub/nsub_read_srt.c b/src/nsub/nsub_read_srt.c
new file mode 100644 (file)
index 0000000..23ed04f
--- /dev/null
@@ -0,0 +1,266 @@
+/*
+ * NSub: Subtitle/Lyrics conversion program (webvtt/srt/lrc)
+ *
+ * Copyright (C) 2022 Niki Roo
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+#include <string.h>
+
+#include "nsub.h"
+#include "utils/utils.h"
+
+/* Declarations */
+
+static int is_srt_id(const char line[]);
+static int is_srt_timing(const char line[]);
+static int get_start(const char line[]);
+static int get_stop(const char line[]);
+static int to_ms(const char line[]);
+
+int nsub_read_srt(song_t *song, char *line) {
+       int empty = 1;
+       for (int i = 0; empty && line[i]; i++) {
+               if (line[i] != ' ')
+                       empty = 0;
+       }
+
+       if (empty)
+               return 1;
+
+       size_t count = array_count(song->lyrics);
+       lyric_t *lyric = NULL;
+       if (count)
+               lyric = array_get(song->lyrics, array_count(song->lyrics) - 1);
+
+       if (is_srt_id(line)) {
+               int new_count = atoi(line);
+
+               if (new_count != count + 1) {
+                       fprintf(stderr,
+                                       "Warning: line %zu is out of order (it is numbered %i), ignoring order...",
+                                       count, new_count);
+               }
+
+               song_add_lyric(song, 0, 0, NULL, NULL);
+       } else if (is_srt_timing(line)) {
+               if (!lyric) {
+                       return 0;
+               }
+
+               lyric->start = get_start(line);
+               lyric->stop = get_stop(line);
+       } else {
+               if (!lyric) {
+                       return 0;
+               }
+
+               char *text = lyric->text;
+               if (text)
+                       text = cstring_concat(text, "\n", line);
+               else
+                       text = strdup(line);
+
+               free(lyric->text);
+               lyric->text = text;
+       }
+
+       return 1;
+}
+
+/* Private */
+
+static int is_srt_id(const char line[]) {
+       for (char *ptr = (char *) line; *ptr; ptr++) {
+               switch (*ptr) {
+               case '0':
+               case '1':
+               case '2':
+               case '3':
+               case '4':
+               case '5':
+               case '6':
+               case '7':
+               case '8':
+               case '9':
+               case ' ':
+                       break;
+               default:
+                       return 0;
+               }
+       }
+
+       return 1;
+}
+
+static int is_srt_timing(const char line[]) {
+       // Canonical example:
+       // 00:00:14,800 --> 00:00:17,400
+
+       int vals = 0;
+       int vals_groups = 0;
+       int sep = 0;
+       int deci = 0;
+
+       for (char *ptr = (char *) line; *ptr; ptr++) {
+               switch (*ptr) {
+               case ' ': // ignore space if not in sep
+                       if (sep && sep < 2)
+                               return 0;
+                       break;
+
+               case '0': // count a new numeric
+               case '1':
+               case '2':
+               case '3':
+               case '4':
+               case '5':
+               case '6':
+               case '7':
+               case '8':
+               case '9':
+                       vals++;
+                       break;
+
+               case ',': // we just did a group
+               case ':':
+                       if (*ptr == ',')
+                               deci = 1;
+
+                       if (vals < 1 || vals > 2)
+                               return 0;
+
+                       vals = 0;
+                       vals_groups++;
+                       break;
+
+               case '-': // Separator (-->)
+                       if (!sep) {
+                               vals_groups++;
+
+                               if (vals < 1 || (vals > 2 && !deci) || vals > 3)
+                                       return 0;
+
+                               if (vals_groups < 1 || (vals_groups > 3 && !deci)
+                                               || vals_groups > 4)
+                                       return 0;
+
+                               vals = 0;
+                               vals_groups = 0;
+                               deci = 0;
+                       }
+
+                       if (sep > 2)
+                               return 0;
+
+                       sep++;
+                       break;
+
+               case '>': // Separator (-->)
+                       if (sep != 2)
+                               return 0;
+
+                       sep++;
+                       break;
+               }
+       }
+
+       if (vals < 1 || (vals > 2 && !deci) || vals > 3)
+               return 0;
+
+       if (vals_groups < 1 || (vals_groups > 3 && !deci) || vals_groups > 4)
+               return 0;
+
+       if (sep != 3)
+               return 0;
+
+       return 1;
+}
+
+static int get_start(const char line[]) {
+       char *ptr = (char *) line;
+       while (*ptr == ' ')
+               ptr++;
+
+       size_t i;
+       for (i = 0; ptr[i] != ' ' && ptr[i] != '-'; i++)
+               ;
+
+       cstring_t*start = cstring_substring(ptr, 0, i);
+       int ms = to_ms(start->string);
+       free_cstring(start);
+       return ms;
+}
+
+static int get_stop(const char line[]) {
+       char *ptr = (char *) line;
+       while (*ptr != '>')
+               ptr++;
+       ptr++;
+       while (*ptr == ' ')
+               ptr++;
+
+       return to_ms(ptr);
+}
+
+static int to_ms(const char line[]) {
+       // 00:00:17,400
+
+       int mults[] = { 1, 1000, 60000, 3600000 };
+
+       int group[4] = { 0, 0, 0, 0 };
+       int igroup = -1;
+
+       char mtmp[3] = { 1, 10, 100 };
+       int itmp = 0;
+
+       int has_milli = 0;
+
+       size_t end = strlen(line) - 1;
+
+       for (size_t i = end; i >= 0; i--) {
+               char car = line[i];
+
+               int digit = (car >= '0' && car <= '9');
+               int dot = car == ',';
+               int col = (car == ':');
+
+               if (!digit && !dot && !col) {
+                       break;
+               }
+
+               if (digit) {
+                       if (itmp == 0)
+                               igroup++;
+
+                       group[igroup] += mtmp[itmp] * (car - (int) '0');
+                       itmp++;
+               } else {
+                       if (dot)
+                               has_milli = 1;
+
+                       itmp = 0;
+               }
+       }
+
+       int total = 0;
+       int multOffset = (has_milli ? 0 : 1);
+       for (int i = 0; i <= igroup; i++) {
+               total += mults[i + multOffset] * group[i];
+       }
+
+       return total;
+}
index b8bf43868f5ed292aefcf3f7f15e79a7eaa196c5..a7224b23462fe99c6b305fb90cabedafa7662a9f 100644 (file)
@@ -34,13 +34,9 @@ int nsub_write_lrc(FILE *out, song_t *song, NSUB_FORMAT fmt, int apply_offset) {
        // header: none
 
        // metas
+       array_loop(song->metas, meta, meta_t)
        {
-               size_t count = array_count(song->metas);
-               meta_t *meta;
-               for (size_t i = 0; i < count; i++) {
-                       meta = (meta_t *) array_get(song->metas, i);
-                       fprintf(out, "[%s: %s]\n", meta->key, meta->value);
-               }
+               fprintf(out, "[%s: %s]\n", meta->key, meta->value);
        }
 
        // offset
@@ -65,13 +61,9 @@ int nsub_write_lrc(FILE *out, song_t *song, NSUB_FORMAT fmt, int apply_offset) {
        }
 
        // lyrics
+       array_loop(song->lyrics, lyric, lyric_t)
        {
-               size_t count = array_count(song->lyrics);
-               lyric_t *lyric;
-               for (size_t i = 0; i < count; i++) {
-                       lyric = (lyric_t*) array_get(song->lyrics, i);
-                       nsub_write_lrc_lyric(out, lyric, offset);
-               }
+               nsub_write_lrc_lyric(out, lyric, offset);
        }
 
        return 1;
index 742d582344d567d224024003b7247e19ec09e64c..57743a98f2845ca7f6681b3f16adba9646e9c128 100644 (file)
@@ -43,13 +43,9 @@ int nsub_write_srt(FILE *out, song_t *song, NSUB_FORMAT fmt, int apply_offset) {
        // other metas: none
 
        // lyrics
+       array_loop(song->lyrics, lyric, lyric_t)
        {
-               size_t count = array_count(song->lyrics);
-               lyric_t *lyric;
-               for (size_t i = 0; i < count; i++) {
-                       lyric = (lyric_t*) array_get(song->lyrics, i);
-                       nsub_write_srt_lyric(out, lyric, offset);
-               }
+               nsub_write_srt_lyric(out, lyric, offset);
        }
 
        return 1;
index 177d6ae99aeda3a9e7da00720f41b9a2d415c39f..b73090bfb2eb64a16068f0f0bb7d917fba67039b 100644 (file)
@@ -41,13 +41,9 @@ int nsub_write_webvtt(FILE *out, song_t *song, NSUB_FORMAT fmt,
        }
 
        // metas
+       array_loop(song->metas, meta, meta_t)
        {
-               size_t count = array_count(song->metas);
-               meta_t *meta;
-               for (size_t i = 0; i < count; i++) {
-                       meta = (meta_t *) array_get(song->metas, i);
-                       fprintf(out, "NOTE META %s: %s\n\n", meta->key, meta->value);
-               }
+               fprintf(out, "NOTE META %s: %s\n\n", meta->key, meta->value);
        }
 
        // offset is not supported in WebVTT (so, always applied)
@@ -62,13 +58,9 @@ int nsub_write_webvtt(FILE *out, song_t *song, NSUB_FORMAT fmt,
        }
 
        // lyrics
+       array_loop(song->lyrics, lyric, lyric_t)
        {
-               size_t count = array_count(song->lyrics);
-               lyric_t *lyric;
-               for (size_t i = 0; i < count; i++) {
-                       lyric = (lyric_t*) array_get(song->lyrics, i);
-                       nsub_write_webvtt_lyric(out, lyric, offset);
-               }
+               nsub_write_webvtt_lyric(out, lyric, offset);
        }
 
        return 1;