From: Niki Roo Date: Sun, 6 Mar 2022 20:58:57 +0000 (+0100) Subject: fix utils, add tests X-Git-Url: http://git.nikiroo.be/?a=commitdiff_plain;h=0e6f440ce7bd3090c86b07a26ff8a780c3b7779b;p=nsub.git fix utils, add tests --- diff --git a/src/tests/bin/utils b/src/tests/bin/utils index 0e2bec6..b9bbc5b 100755 Binary files a/src/tests/bin/utils and b/src/tests/bin/utils differ diff --git a/src/tests/launcher.o b/src/tests/launcher.o deleted file mode 100644 index ea0fcf6..0000000 Binary files a/src/tests/launcher.o and /dev/null differ diff --git a/src/tests/utils/array.o b/src/tests/utils/array.o deleted file mode 100644 index 56480d7..0000000 Binary files a/src/tests/utils/array.o and /dev/null differ diff --git a/src/tests/utils/cstring.c b/src/tests/utils/cstring.c index daa612c..b44aef2 100644 --- a/src/tests/utils/cstring.c +++ b/src/tests/utils/cstring.c @@ -17,10 +17,12 @@ * along with this program. If not, see . */ +#include + #include "launcher.h" #include "utils/cstring.h" -#define TEST_FILE_READLN "utils/test_readln.txt" +#define TEST_FILE_READLINE "utils/test_readline.txt" cstring *s; @@ -77,8 +79,8 @@ START(add_all_but_p) ASSERT_EQUALS_STR("Multi-line", str, s->string); reset(); - str = - "Les accents en français sont bien là et se retrouvent avec une fréquence élevée"; + str = "Les accents en français sont bien là et se " + "retrouvent avec une fréquence élevée"; cstring_add(s, str); ASSERT_EQUALS_STR("accents", str, s->string); reset(); @@ -439,12 +441,12 @@ START(clone) if (clone) FAIL("Cloning NULL must return NULL"); - clone = cstring_clone(s); + clone = cstring_clone(""); ASSERT_EQUALS_STR("Cannot clone the empty string", "", clone->string); free_cstring(clone); cstring_add(s, "Testy viva la vida"); - clone = cstring_clone(s); + clone = cstring_clone(s->string); ASSERT_EQUALS_STR("Failed to clone the string", s->string, clone->string); free_cstring(clone); @@ -531,6 +533,44 @@ START(trim) END +START(remove_crlf) + char *str = "testy"; + + cstring_add(s, str); + s->length = cstring_remove_crlf(s->string); + ASSERT_EQUALS_STR("no-op failed", str, s->string); + + reset(); + cstring_add(s, str); + cstring_add(s, "\n"); + s->length = cstring_remove_crlf(s->string); + ASSERT_EQUALS_STR("\\n failed", str, s->string); + + reset(); + cstring_add(s, str); + cstring_add(s, "\r\n"); + s->length = cstring_remove_crlf(s->string); + ASSERT_EQUALS_STR("\\r\\n failed", str, s->string); + + reset(); + cstring_add(s, str); + cstring_add(s, "\n\n"); + s->length = cstring_remove_crlf(s->string); + ASSERT_EQUALS_STR("\\n\\n failed", "testy\n", s->string); + + reset(); + cstring_add(s, str); + cstring_add(s, "\r\n\r\n"); + s->length = cstring_remove_crlf(s->string); + ASSERT_EQUALS_STR("\\r\\n\\r\\n failed", "testy\r\n", s->string); + + reset(); + cstring_add(s, "\n"); + s->length = cstring_remove_crlf(s->string); + ASSERT_EQUALS_STR("\\n uniq failed", "", s->string); + + END + START(toupper) cstring_add(s, ""); cstring_toupper(s); @@ -589,9 +629,9 @@ START(tolower) END -START(readln) +START(readline) int read; - FILE *testin = fopen(TEST_FILE_READLN, "r"); + FILE *testin = fopen(TEST_FILE_READLINE, "r"); if (!testin) FAIL("Test file not found: test_readln.txt"); @@ -619,6 +659,121 @@ START(readln) END +START(add_path) + cstring_add_path(s, "root"); + ASSERT_EQUALS_STR("failed to create root path", "/root", s->string); + + cstring_add_path(s, "dir"); + ASSERT_EQUALS_STR("failed to add a dir", "/root/dir", s->string); + + cstring_add_path(s, "sub/"); + ASSERT_EQUALS_STR("extra / failed", "/root/dir/sub", s->string); + + END + +START(pop_path) + cstring_add(s, ""); + ASSERT_EQUALS_INT("empty test failed", 0, cstring_pop_path(s, 1)); + + reset(); + cstring_add(s, "root"); + ASSERT_EQUALS_INT("0 nbr test failed", 0, cstring_pop_path(s, 0)); + ASSERT_EQUALS_STR("0 test failed", "root", s->string); + + reset(); + cstring_add(s, "root/"); + ASSERT_EQUALS_INT("0² nbr test failed", 0, cstring_pop_path(s, 0)); + ASSERT_EQUALS_STR("0² test failed", "root", s->string); + + reset(); + cstring_add(s, "/"); + ASSERT_EQUALS_INT("root test nbr failed", 0, cstring_pop_path(s, 1)); + ASSERT_EQUALS_STR("root test failed", "/", s->string); + + reset(); + cstring_add(s, "/"); + ASSERT_EQUALS_INT("root² test nbr failed", 0, cstring_pop_path(s, 2)); + ASSERT_EQUALS_STR("root² test failed", "/", s->string); + + reset(); + cstring_add(s, "/root"); + ASSERT_EQUALS_INT("/root test nbr failed", 1, cstring_pop_path(s, 1)); + ASSERT_EQUALS_STR("/root test failed", "/", s->string); + + reset(); + cstring_add(s, "/root"); + ASSERT_EQUALS_INT("/root³ test nbr failed", 1, cstring_pop_path(s, 2)); + ASSERT_EQUALS_STR("/root³ test failed", "/", s->string); + + reset(); + cstring_add(s, "/root/dir/file"); + ASSERT_EQUALS_INT("2 test nbr failed", 2, cstring_pop_path(s, 2)); + ASSERT_EQUALS_STR("2 test failed", "/root", s->string); + + reset(); + cstring_add(s, "/root/dir/file/"); + ASSERT_EQUALS_INT("trailing / test nbr failed", 1, + cstring_pop_path(s, 1)); + ASSERT_EQUALS_STR("trailing / test failed", "/root/dir", s->string); + + END + +START(basename) + char *str; + + cstring_add(s, ""); + str = cstring_basename(s->string, NULL); + ASSERT_EQUALS_STR("empty test", "", str); + free(str); + + reset(); + cstring_add(s, "/root/path/dir/file"); + str = cstring_basename(s->string, NULL); + ASSERT_EQUALS_STR("simple test", "file", str); + free(str); + + reset(); + cstring_add(s, "/root/path/dir/file"); + str = cstring_basename(s->string, ".ext"); + ASSERT_EQUALS_STR("no ext test", "file", str); + free(str); + + reset(); + cstring_add(s, "/root/path/dir/file.test"); + str = cstring_basename(s->string, ".ext"); + ASSERT_EQUALS_STR("wrong ext test", "file.test", str); + free(str); + + reset(); + cstring_add(s, "/root/path/dir/file.ext"); + str = cstring_basename(s->string, ".ext"); + ASSERT_EQUALS_STR("good ext test", "file", str); + free(str); + + END + +START(dirname) + char *str; + + cstring_add(s, "/root/path"); + str = cstring_dirname(s->string); + ASSERT_EQUALS_STR("simple test", "/root", str); + free(str); + + reset(); + cstring_add(s, "/root/path/"); + str = cstring_dirname(s->string); + ASSERT_EQUALS_STR("trailing / test", "/root", str); + free(str); + + reset(); + cstring_add(s, "/"); + str = cstring_dirname(s->string); + ASSERT_EQUALS_STR("root is root of root test", "/", str); + free(str); + + END + START(many_adds) size_t count = 50000000; for (size_t i = 0; i < count; i++) { @@ -652,10 +807,14 @@ Suite *test_cstring(const char title[]) { tcase_add_test(core, clone); tcase_add_test(core, rtrim); tcase_add_test(core, trim); + tcase_add_test(core, remove_crlf); tcase_add_test(core, toupper); tcase_add_test(core, tolower); - - tcase_add_test(core, readln); + tcase_add_test(core, readline); + tcase_add_test(core, add_path); + tcase_add_test(core, pop_path); + tcase_add_test(core, basename); + tcase_add_test(core, dirname); suite_add_tcase(suite, core); diff --git a/src/tests/utils/cstring.o b/src/tests/utils/cstring.o deleted file mode 100644 index 6da56ba..0000000 Binary files a/src/tests/utils/cstring.o and /dev/null differ diff --git a/src/tests/utils/main.o b/src/tests/utils/main.o deleted file mode 100644 index b632e13..0000000 Binary files a/src/tests/utils/main.o and /dev/null differ diff --git a/src/tests/utils/test_readln.txt b/src/tests/utils/test_readline.txt similarity index 100% rename from src/tests/utils/test_readln.txt rename to src/tests/utils/test_readline.txt diff --git a/src/utils/cstring.c b/src/utils/cstring.c index d501537..7c06927 100644 --- a/src/utils/cstring.c +++ b/src/utils/cstring.c @@ -1,7 +1,7 @@ /* * CUtils: some small C utilities * - * Copyright (C) 2012 Niki Roo + * Copyright (C) 2011 Niki Roo * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -19,10 +19,10 @@ /* Name: cstring.c - Copyright: niki (cc-by-nc) 2011 + Copyright: niki (gpl3 or later) 2011 Author: niki Date: 2011-06-16 - Description: cstring is a collection of helper functions to manipulate string of text + Description: cstring is a collection of helper functions to manipulate text */ #include "cstring.h" @@ -33,6 +33,9 @@ #include #include +// For upper/lowercase +#include + #ifndef BUFFER_SIZE #define BUFFER_SIZE 81 #endif @@ -43,7 +46,7 @@ #define CSTRING_SEP '/' #endif -//start of private prototypes +// Private functions typedef struct { size_t buffer_length; @@ -53,8 +56,14 @@ typedef struct { static void cstring_swap(cstring *a, cstring *b); /** Change the case to upper -or- lower case (UTF8-compatible) */ static void cstring_change_case(cstring *self, int up); +/** For path-related functions */ +static void normalize_path(cstring *self); + +// Private variables -//end of private prototypes +static char *locale = NULL; + +// end of privates cstring *new_cstring() { cstring *string; @@ -334,16 +343,15 @@ int cstring_replace(cstring *self, const char from[], const char to[]) { cstring *buffer; size_t i; size_t step; - char *swap; int occur; - // easy optimization: + // easy optimisation: if (!from || !from[0]) return 0; if (from && to && from[0] && to[0] && !from[1] && !to[1]) return cstring_replace_car(self->string, from[0], to[0]); - // optimize for same-size strings? + // optimise for same-size strings? step = strlen(from) - 1; buffer = new_cstring(); @@ -358,12 +366,7 @@ int cstring_replace(cstring *self, const char from[], const char to[]) { } } - // not clean, but quicker: - swap = self->string; - self->string = buffer->string; - buffer->string = swap; - self->length = buffer->length; - + cstring_swap(self, buffer); free_cstring(buffer); return occur; } @@ -459,12 +462,12 @@ char *cstring_convert(cstring *self) { return string; } -cstring *cstring_clone(cstring *self) { +cstring *cstring_clone(const char self[]) { if (self == NULL) return NULL; cstring *clone = new_cstring(); - cstring_add(clone, self->string); + cstring_add(clone, self); return clone; } @@ -497,27 +500,16 @@ void cstring_trim(cstring *self, char car) { } } -size_t cstring_remove_crlf(cstring *self) { - size_t removed; - - removed = cstring_sremove_crlf(self->string, self->length); - self->length -= removed; - - return removed; -} - -size_t cstring_sremove_crlf(char data[], size_t n) { - size_t removed; - - removed = n; - while (removed > 0 - && (data[removed - 1] == '\r' || data[removed - 1] == '\n')) { - removed--; - } +size_t cstring_remove_crlf(char *self) { + size_t sz = strlen(self); + if (sz && self[sz - 1] == '\n') + sz--; + if (sz && self[sz - 1] == '\r') + sz--; - data[removed] = '\0'; + self[sz] = '\0'; - return removed; + return sz; } void cstring_toupper(cstring *self) { @@ -529,30 +521,75 @@ void cstring_tolower(cstring *self) { } void cstring_change_case(cstring *self, int up) { - wchar_t *wide; + // Change LC_ALL to LANG if not found + // TODO: only take part we need (also, this is still bad practise) + if (!locale) { + locale = setlocale(LC_ALL, NULL); + if (!locale || !locale[0] || !strcmp("C", locale)) { + char *lang = getenv("LANG"); + if (lang && lang[0]) { + locale = setlocale(LC_ALL, lang); + if (!locale) + locale = ""; + } + } + } + + cstring *rep; + mbstate_t state_from, state_to; + wchar_t wide; char tmp[10]; - const char *src = self->string; - size_t s, i; - mbstate_t state; - - // init the state (passing NULL is not thread-safe) - memset(&state, '\0', sizeof(mbstate_t)); - - // won't contain MORE chars (but maybe less) - wide = (wchar_t *) malloc((self->length + 1) * sizeof(wchar_t)); - s = mbsrtowcs(wide, &src, self->length, &state); - wide[s] = (wchar_t) '\0'; - cstring_clear(self); - for (i = 0; i <= s; i++) { + size_t count; + + // init the state (NULL = internal hidden state, not thread-safe) + memset(&state_from, '\0', sizeof(mbstate_t)); + memset(&state_to, '\0', sizeof(mbstate_t)); + + rep = new_cstring(); + + size_t i = 0; + while (i < self->length) { + count = mbrtowc(&wide, self->string + i, self->length - i, &state_from); + + //incomplete (should not happen) + if (count == (size_t) -2) { + // return; + cstring_add_car(rep, '_'); + i++; + continue; + } + // invalid multibyte sequence + if (count == (size_t) -1) { + // return; + cstring_add_car(rep, '_'); + i++; + continue; + } + + // End of String (should not happen, see WHILE condition) + if (!count) + break; + + // char is ok + i += count; + if (up) - wide[i] = (wchar_t) towupper((wint_t) wide[i]); + wide = (wchar_t) towupper((wint_t) wide); else - wide[i] = (wchar_t) towlower((wint_t) wide[i]); - memset(&state, '\0', sizeof(mbstate_t)); - wcrtomb(tmp, wide[i], &state); - cstring_add(self, tmp); + wide = (wchar_t) towlower((wint_t) wide); + + count = wcrtomb(tmp, wide, &state_to); + if (count == (size_t) -1) { + // failed to convert :( + cstring_add_car(rep, '_'); + } else { + tmp[count] = '\0'; + cstring_add(rep, tmp); + } } - free(wide); + + cstring_swap(self, rep); + free_cstring(rep); } int cstring_readline(cstring *self, FILE *file) { @@ -570,26 +607,25 @@ int cstring_readline(cstring *self, FILE *file) { cstring_clear(self); buffer[0] = '\0'; - // Note: strlen() could return 0 if the file contains \0 - // at the start of a line + // Note: fgets() could return NULL if EOF is reached if (!fgets(buffer, (int) BUFFER_SIZE - 1, file)) return 0; - size = strlen(buffer); + size = strlen(buffer); full_line = ((file && feof(file)) || size == 0 || buffer[size - 1] == '\n'); - size -= cstring_sremove_crlf(buffer, size); + size = cstring_remove_crlf(buffer); cstring_add(self, buffer); // No luck, we need to continue getting data while (!full_line) { if (!fgets(buffer, (int) BUFFER_SIZE - 1, file)) break; - size = strlen(buffer); + size = strlen(buffer); full_line = ((file && feof(file)) || size == 0 || buffer[size - 1] == '\n'); - size -= cstring_sremove_crlf(buffer, size); + size = cstring_remove_crlf(buffer); cstring_add(self, buffer); } @@ -599,78 +635,81 @@ int cstring_readline(cstring *self, FILE *file) { return 0; } +static void normalize_path(cstring *self) { + while (self->length && self->string[self->length - 1] == CSTRING_SEP) + self->length--; + self->string[self->length] = '\0'; +} + void cstring_add_path(cstring *self, const char subpath[]) { + while (self->length && self->string[self->length - 1] == CSTRING_SEP) + self->length--; cstring_add_car(self, CSTRING_SEP); - cstring_add(self, subpath); + if (subpath && subpath[0]) { + cstring_add(self, subpath); + } + + normalize_path(self); } int cstring_pop_path(cstring *self, int how_many) { - char sep[] = { CSTRING_SEP }; int count = 0; + size_t tmp; + char first = '\0'; - cstring_rtrim(self, CSTRING_SEP); - for (int i = 0; i < how_many; i++) { - size_t idx = cstring_rfind(self->string, sep, 0); - if (!idx) - break; + if (self->length) + first = self->string[0]; - cstring_cut_at(self, idx - 1); - count++; + normalize_path(self); + for (int i = 0; i < how_many; i++) { + tmp = self->length; + while (self->length && self->string[self->length - 1] != CSTRING_SEP) + self->length--; + while (self->length && self->string[self->length - 1] == CSTRING_SEP) + self->length--; + if (self->length != tmp) + count++; } + normalize_path(self); + + // Root is root of root + if (first == CSTRING_SEP && !self->length) + cstring_add_car(self, CSTRING_SEP); return count; } -cstring *cstring_getdir(const char path[]) { - cstring *result; +char *cstring_basename(const char path[], const char ext[]) { size_t i; - size_t sz = strlen(path); - i = sz - 1; - if (i >= 0 && path[i] == CSTRING_SEP) + i = sz; + while (i && path[i] != CSTRING_SEP) i--; - for (; i >= 0 && path[i] != CSTRING_SEP; i--) - ; - - if (i < 0) - return new_cstring(); - - result = new_cstring(); - cstring_addn(result, path, i); - - return result; -} - -cstring *cstring_getfile(cstring *path) { - cstring *result; - ssize_t i; - i = (ssize_t) path->length - 1; - if (i >= 0 && path->string[i] == CSTRING_SEP) - i--; - for (; i >= 0 && path->string[i] != CSTRING_SEP; i--) - ; + cstring *rep; + if (path[i] != CSTRING_SEP) { + rep = cstring_clone(path); + } else { + rep = new_cstring(); + cstring_addf(rep, path, i + 1); + } - if (i < 0 || (size_t) (i + 1) >= path->length) - return new_cstring(); + if (ext && ext[0] && cstring_ends_with(rep->string, ext)) { + cstring_cut_at(rep, rep->length - strlen(ext)); + } - result = new_cstring(); - cstring_add(result, path->string + i + 1); - return result; + return cstring_convert(rep); } -cstring *cstring_getfiles(const char path[]) { - cstring *copy = new_cstring(); - cstring_add(copy, path); - - cstring *result = cstring_getfile(copy); - - free_cstring(copy); - - return result; +char *cstring_dirname(const char path[]) { + cstring *rep = cstring_clone(path); + cstring_pop_path(rep, 1); + return cstring_convert(rep); } -int cstring_is_whole(cstring *self) { - return mbstowcs(NULL, self->string, 0) != (size_t) -1; +int cstring_is_utf8(cstring *self) { + size_t rep = mbstowcs(NULL, self->string, 0); + // -2 = invalid, -1 = not whole + return (rep != (size_t) -2) && (rep != (size_t) -1); } diff --git a/src/utils/cstring.h b/src/utils/cstring.h index 7a378f0..ecd34d6 100644 --- a/src/utils/cstring.h +++ b/src/utils/cstring.h @@ -323,7 +323,7 @@ char *cstring_convert(cstring *self); * * @param self the string to clone */ -cstring *cstring_clone(cstring *self); +cstring *cstring_clone(const char self[]); /** * Trim this cstring of all trailing 'car' instances. @@ -346,10 +346,22 @@ void cstring_rtrim(cstring *self, char car); */ void cstring_trim(cstring *self, char car); +/** + * Remove the \r and \n sequence (or one OR the other) at the end of the string. + * + * @param self the string to change + * + * @return the new length of the string + */ +size_t cstring_remove_crlf(char *self); + /** * Change the case to upper-case (UTF-8 compatible, but the string MUST be * whole). * + * @note: if LC_ALL is not set or is set to C and a viable $LANG exists, it will + * set LC_ALL to $LANG + * * @param self the cstring to work on */ void cstring_toupper(cstring *self); @@ -358,6 +370,9 @@ void cstring_toupper(cstring *self); * Change the case to lower-case (UTF-8 compatible, but the string MUST be * whole). * + * @note: if LC_ALL is not set or is set to C and a viable $LANG exists, it will + * set LC_ALL to $LANG + * * @param self the cstring to work on */ void cstring_tolower(cstring *self); @@ -372,16 +387,6 @@ void cstring_tolower(cstring *self); */ int cstring_readline(cstring *self, FILE *file); -/** - * Read a whole line (CR, LN or CR+LN terminated) from the given socket. - * - * @param self the cstring to read into - * @param fd the socket to read from - * - * @return 1 if a line was read, 0 if not - */ -int cstring_readnet(cstring *self, int fd); - /** * Add a path to the given cstring (if it is currently empty, it * will result in a root path). @@ -394,7 +399,16 @@ int cstring_readnet(cstring *self, int fd); */ void cstring_add_path(cstring *self, const char subpath[]); -//TODO: desc +/** + * Remove the how_many components of the path described by this + * cstring. Will ignore extra path separators and always trim it from the final + * result (i.e., some//path/ is identical to some/path). + * + * @note popping "0" path will simply make sure the string does not end in "/" + * + * @param how_many how many path components to remove (for instance, to go from + * /some/path/to/file to /some/path you would need 2) + */ int cstring_pop_path(cstring *self, int how_many); /** @@ -402,10 +416,13 @@ int cstring_pop_path(cstring *self, int how_many); * '/home/user/file.ext' becomes 'file.ext'). * * @param path the path to get the dir of (it can be a dir itself) + * @param ext the extension to remove if any (can be empty or NULL for none) + * + * @note the extension should include the "." if any * * @return a new string representing the parent directory */ -char *cstring_basename(const char path[]); +char *cstring_basename(const char path[], const char ext[]); /** * Return the dirname of this path (for instance, @@ -418,49 +435,12 @@ char *cstring_basename(const char path[]); char *cstring_dirname(const char path[]); /** - * Return the latest path component of this path (usually a FILE). - * - * @param path the path to get the basename of (it can be a dir itself) - * - * @return a new cstring representing the latest path component - */ -cstring *cstring_getfile(cstring *path); - -/** - * Return the latest path component of this path (usually a FILE). - * - * @param path the path to get the basename of (it can be a dir itself) - * - * @return a new string representing the latest path component - */ -cstring *cstring_getfiles(const char path[]); - -/** - * Remove all the \r and \n at the end of the given cstring. - * - * @param self the cstring to change - * - * @return how many removed characters - */ -size_t cstring_remove_crlf(cstring *self); - -/** - * Remove all the \r and \n at the end of the given string. - * - * @param self the string to change - * @param n the size of the string - * - * @return how many removed characters - */ -size_t cstring_sremove_crlf(char *self, size_t n); - -/** - * Check if the string is whole (i.e., it doesn't contain incomplete UTF-8 - * sequences). + * Check if the string is a correct and whole UTF-8 string (i.e., it is indeed + * an UTF-8 string and doesn't contain incomplete UTF-8 sequences). * - * @return TRUE if it is whole + * @return TRUE if it is UTF-8 */ -int cstring_is_whole(cstring *self); +int cstring_is_utf8(cstring *self); #endif