From a051d84d37f91d42f20a999f975adde3b4d1dcee Mon Sep 17 00:00:00 2001 From: Niki Roo Date: Sun, 15 May 2022 13:22:05 +0200 Subject: [PATCH] nsub: add SRT read support --- README-fr.md | 2 +- README.md | 2 +- src/nsub/nsub.c | 35 +++-- src/nsub/nsub_read_srt.c | 266 +++++++++++++++++++++++++++++++++++ src/nsub/nsub_write_lrc.c | 16 +-- src/nsub/nsub_write_srt.c | 8 +- src/nsub/nsub_write_webvtt.c | 16 +-- 7 files changed, 299 insertions(+), 46 deletions(-) create mode 100644 src/nsub/nsub_read_srt.c diff --git a/README-fr.md b/README-fr.md index 6743527..e665ab6 100644 --- a/README-fr.md +++ b/README-fr.md @@ -25,7 +25,7 @@ Il ne nécessite pas de librairies externes. - `SRT` fichiers sous-titres SubRip, ils accompagnent en général des films - `WebVTT` Web Video Text Tracks, un nouveau standard W3C -Note : ce programme ne peut pas encore ouvrir des fichiers WebVTT ni SRT (il supporte toutefois les 3 formats en écriture) +Note : ce programme ne peut pas encore ouvrir des fichiers WebVTT (il supporte toutefois les 3 formats en écriture) ## Options diff --git a/README.md b/README.md index 295f017..7642dcc 100644 --- a/README.md +++ b/README.md @@ -25,7 +25,7 @@ It does not require external libraries. - `SRT` SubRip subtitle files, usually distributed with films - `WebVTT` Web Video Text Tracks, a new W3C standard -Note: this program can not yet open WebVTT nor SRT files (it supports all 3 formats as output, though) +Note: this program can not yet open WebVTT files (it supports all 3 formats as output, though) ## Options diff --git a/src/nsub/nsub.c b/src/nsub/nsub.c index d5d39cb..221aeb1 100644 --- a/src/nsub/nsub.c +++ b/src/nsub/nsub.c @@ -103,25 +103,32 @@ void song_add_meta(song_t *song, char *key, char *value) { } song_t *nsub_read(FILE *in, NSUB_FORMAT fmt) { - song_t *song = new_song(); + song_t *song = NULL; + cstring_t *line = NULL; - cstring_t *line = new_cstring(); + /* Which reader? */ + int (*read_a_line)(song_t *, char *) = NULL; + switch (fmt) { + case NSUB_FMT_LRC: + read_a_line = nsub_read_lrc; + break; + case NSUB_FMT_SRT: + read_a_line = nsub_read_srt; + break; + default: + fprintf(stderr, "Unsupported read format %d\n", fmt); + goto fail; + } + + /* Read it */ + song = new_song(); + line = new_cstring(); size_t i = 0; while (cstring_readline(line, in)) { i++; - switch (fmt) { - case NSUB_FMT_LRC: - if (!nsub_read_lrc(song, line->string)) { - fprintf(stderr, "Read error on line %zu: <%s>\n", i, - line->string); - song = NULL; - goto fail; - } - break; - default: - fprintf(stderr, "Unsupported read format %d\n", fmt); - free_song(song); + if (!read_a_line(song, line->string)) { + fprintf(stderr, "Read error on line %zu: <%s>\n", i, line->string); song = NULL; goto fail; } diff --git a/src/nsub/nsub_read_srt.c b/src/nsub/nsub_read_srt.c new file mode 100644 index 0000000..23ed04f --- /dev/null +++ b/src/nsub/nsub_read_srt.c @@ -0,0 +1,266 @@ +/* + * NSub: Subtitle/Lyrics conversion program (webvtt/srt/lrc) + * + * Copyright (C) 2022 Niki Roo + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#include +#include + +#include "nsub.h" +#include "utils/utils.h" + +/* Declarations */ + +static int is_srt_id(const char line[]); +static int is_srt_timing(const char line[]); +static int get_start(const char line[]); +static int get_stop(const char line[]); +static int to_ms(const char line[]); + +int nsub_read_srt(song_t *song, char *line) { + int empty = 1; + for (int i = 0; empty && line[i]; i++) { + if (line[i] != ' ') + empty = 0; + } + + if (empty) + return 1; + + size_t count = array_count(song->lyrics); + lyric_t *lyric = NULL; + if (count) + lyric = array_get(song->lyrics, array_count(song->lyrics) - 1); + + if (is_srt_id(line)) { + int new_count = atoi(line); + + if (new_count != count + 1) { + fprintf(stderr, + "Warning: line %zu is out of order (it is numbered %i), ignoring order...", + count, new_count); + } + + song_add_lyric(song, 0, 0, NULL, NULL); + } else if (is_srt_timing(line)) { + if (!lyric) { + return 0; + } + + lyric->start = get_start(line); + lyric->stop = get_stop(line); + } else { + if (!lyric) { + return 0; + } + + char *text = lyric->text; + if (text) + text = cstring_concat(text, "\n", line); + else + text = strdup(line); + + free(lyric->text); + lyric->text = text; + } + + return 1; +} + +/* Private */ + +static int is_srt_id(const char line[]) { + for (char *ptr = (char *) line; *ptr; ptr++) { + switch (*ptr) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + case ' ': + break; + default: + return 0; + } + } + + return 1; +} + +static int is_srt_timing(const char line[]) { + // Canonical example: + // 00:00:14,800 --> 00:00:17,400 + + int vals = 0; + int vals_groups = 0; + int sep = 0; + int deci = 0; + + for (char *ptr = (char *) line; *ptr; ptr++) { + switch (*ptr) { + case ' ': // ignore space if not in sep + if (sep && sep < 2) + return 0; + break; + + case '0': // count a new numeric + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + vals++; + break; + + case ',': // we just did a group + case ':': + if (*ptr == ',') + deci = 1; + + if (vals < 1 || vals > 2) + return 0; + + vals = 0; + vals_groups++; + break; + + case '-': // Separator (-->) + if (!sep) { + vals_groups++; + + if (vals < 1 || (vals > 2 && !deci) || vals > 3) + return 0; + + if (vals_groups < 1 || (vals_groups > 3 && !deci) + || vals_groups > 4) + return 0; + + vals = 0; + vals_groups = 0; + deci = 0; + } + + if (sep > 2) + return 0; + + sep++; + break; + + case '>': // Separator (-->) + if (sep != 2) + return 0; + + sep++; + break; + } + } + + if (vals < 1 || (vals > 2 && !deci) || vals > 3) + return 0; + + if (vals_groups < 1 || (vals_groups > 3 && !deci) || vals_groups > 4) + return 0; + + if (sep != 3) + return 0; + + return 1; +} + +static int get_start(const char line[]) { + char *ptr = (char *) line; + while (*ptr == ' ') + ptr++; + + size_t i; + for (i = 0; ptr[i] != ' ' && ptr[i] != '-'; i++) + ; + + cstring_t*start = cstring_substring(ptr, 0, i); + int ms = to_ms(start->string); + free_cstring(start); + return ms; +} + +static int get_stop(const char line[]) { + char *ptr = (char *) line; + while (*ptr != '>') + ptr++; + ptr++; + while (*ptr == ' ') + ptr++; + + return to_ms(ptr); +} + +static int to_ms(const char line[]) { + // 00:00:17,400 + + int mults[] = { 1, 1000, 60000, 3600000 }; + + int group[4] = { 0, 0, 0, 0 }; + int igroup = -1; + + char mtmp[3] = { 1, 10, 100 }; + int itmp = 0; + + int has_milli = 0; + + size_t end = strlen(line) - 1; + + for (size_t i = end; i >= 0; i--) { + char car = line[i]; + + int digit = (car >= '0' && car <= '9'); + int dot = car == ','; + int col = (car == ':'); + + if (!digit && !dot && !col) { + break; + } + + if (digit) { + if (itmp == 0) + igroup++; + + group[igroup] += mtmp[itmp] * (car - (int) '0'); + itmp++; + } else { + if (dot) + has_milli = 1; + + itmp = 0; + } + } + + int total = 0; + int multOffset = (has_milli ? 0 : 1); + for (int i = 0; i <= igroup; i++) { + total += mults[i + multOffset] * group[i]; + } + + return total; +} diff --git a/src/nsub/nsub_write_lrc.c b/src/nsub/nsub_write_lrc.c index b8bf438..a7224b2 100644 --- a/src/nsub/nsub_write_lrc.c +++ b/src/nsub/nsub_write_lrc.c @@ -34,13 +34,9 @@ int nsub_write_lrc(FILE *out, song_t *song, NSUB_FORMAT fmt, int apply_offset) { // header: none // metas + array_loop(song->metas, meta, meta_t) { - size_t count = array_count(song->metas); - meta_t *meta; - for (size_t i = 0; i < count; i++) { - meta = (meta_t *) array_get(song->metas, i); - fprintf(out, "[%s: %s]\n", meta->key, meta->value); - } + fprintf(out, "[%s: %s]\n", meta->key, meta->value); } // offset @@ -65,13 +61,9 @@ int nsub_write_lrc(FILE *out, song_t *song, NSUB_FORMAT fmt, int apply_offset) { } // lyrics + array_loop(song->lyrics, lyric, lyric_t) { - size_t count = array_count(song->lyrics); - lyric_t *lyric; - for (size_t i = 0; i < count; i++) { - lyric = (lyric_t*) array_get(song->lyrics, i); - nsub_write_lrc_lyric(out, lyric, offset); - } + nsub_write_lrc_lyric(out, lyric, offset); } return 1; diff --git a/src/nsub/nsub_write_srt.c b/src/nsub/nsub_write_srt.c index 742d582..57743a9 100644 --- a/src/nsub/nsub_write_srt.c +++ b/src/nsub/nsub_write_srt.c @@ -43,13 +43,9 @@ int nsub_write_srt(FILE *out, song_t *song, NSUB_FORMAT fmt, int apply_offset) { // other metas: none // lyrics + array_loop(song->lyrics, lyric, lyric_t) { - size_t count = array_count(song->lyrics); - lyric_t *lyric; - for (size_t i = 0; i < count; i++) { - lyric = (lyric_t*) array_get(song->lyrics, i); - nsub_write_srt_lyric(out, lyric, offset); - } + nsub_write_srt_lyric(out, lyric, offset); } return 1; diff --git a/src/nsub/nsub_write_webvtt.c b/src/nsub/nsub_write_webvtt.c index 177d6ae..b73090b 100644 --- a/src/nsub/nsub_write_webvtt.c +++ b/src/nsub/nsub_write_webvtt.c @@ -41,13 +41,9 @@ int nsub_write_webvtt(FILE *out, song_t *song, NSUB_FORMAT fmt, } // metas + array_loop(song->metas, meta, meta_t) { - size_t count = array_count(song->metas); - meta_t *meta; - for (size_t i = 0; i < count; i++) { - meta = (meta_t *) array_get(song->metas, i); - fprintf(out, "NOTE META %s: %s\n\n", meta->key, meta->value); - } + fprintf(out, "NOTE META %s: %s\n\n", meta->key, meta->value); } // offset is not supported in WebVTT (so, always applied) @@ -62,13 +58,9 @@ int nsub_write_webvtt(FILE *out, song_t *song, NSUB_FORMAT fmt, } // lyrics + array_loop(song->lyrics, lyric, lyric_t) { - size_t count = array_count(song->lyrics); - lyric_t *lyric; - for (size_t i = 0; i < count; i++) { - lyric = (lyric_t*) array_get(song->lyrics, i); - nsub_write_webvtt_lyric(out, lyric, offset); - } + nsub_write_webvtt_lyric(out, lyric, offset); } return 1; -- 2.27.0