* along with this program. If not, see <https://www.gnu.org/licenses/>.
*/
+#include <stdlib.h>
+
#include "launcher.h"
#include "utils/cstring.h"
-#define TEST_FILE_READLN "utils/test_readln.txt"
+#define TEST_FILE_READLINE "utils/test_readline.txt"
cstring *s;
ASSERT_EQUALS_STR("Multi-line", str, s->string);
reset();
- str =
- "Les accents en français sont bien là et se retrouvent avec une fréquence élevée";
+ str = "Les accents en français sont bien là et se "
+ "retrouvent avec une fréquence élevée";
cstring_add(s, str);
ASSERT_EQUALS_STR("accents", str, s->string);
reset();
if (clone)
FAIL("Cloning NULL must return NULL");
- clone = cstring_clone(s);
+ clone = cstring_clone("");
ASSERT_EQUALS_STR("Cannot clone the empty string", "", clone->string);
free_cstring(clone);
cstring_add(s, "Testy viva la vida");
- clone = cstring_clone(s);
+ clone = cstring_clone(s->string);
ASSERT_EQUALS_STR("Failed to clone the string", s->string,
clone->string);
free_cstring(clone);
END
+START(remove_crlf)
+ char *str = "testy";
+
+ cstring_add(s, str);
+ s->length = cstring_remove_crlf(s->string);
+ ASSERT_EQUALS_STR("no-op failed", str, s->string);
+
+ reset();
+ cstring_add(s, str);
+ cstring_add(s, "\n");
+ s->length = cstring_remove_crlf(s->string);
+ ASSERT_EQUALS_STR("\\n failed", str, s->string);
+
+ reset();
+ cstring_add(s, str);
+ cstring_add(s, "\r\n");
+ s->length = cstring_remove_crlf(s->string);
+ ASSERT_EQUALS_STR("\\r\\n failed", str, s->string);
+
+ reset();
+ cstring_add(s, str);
+ cstring_add(s, "\n\n");
+ s->length = cstring_remove_crlf(s->string);
+ ASSERT_EQUALS_STR("\\n\\n failed", "testy\n", s->string);
+
+ reset();
+ cstring_add(s, str);
+ cstring_add(s, "\r\n\r\n");
+ s->length = cstring_remove_crlf(s->string);
+ ASSERT_EQUALS_STR("\\r\\n\\r\\n failed", "testy\r\n", s->string);
+
+ reset();
+ cstring_add(s, "\n");
+ s->length = cstring_remove_crlf(s->string);
+ ASSERT_EQUALS_STR("\\n uniq failed", "", s->string);
+
+ END
+
START(toupper)
cstring_add(s, "");
cstring_toupper(s);
END
-START(readln)
+START(readline)
int read;
- FILE *testin = fopen(TEST_FILE_READLN, "r");
+ FILE *testin = fopen(TEST_FILE_READLINE, "r");
if (!testin)
FAIL("Test file not found: test_readln.txt");
END
+START(add_path)
+ cstring_add_path(s, "root");
+ ASSERT_EQUALS_STR("failed to create root path", "/root", s->string);
+
+ cstring_add_path(s, "dir");
+ ASSERT_EQUALS_STR("failed to add a dir", "/root/dir", s->string);
+
+ cstring_add_path(s, "sub/");
+ ASSERT_EQUALS_STR("extra / failed", "/root/dir/sub", s->string);
+
+ END
+
+START(pop_path)
+ cstring_add(s, "");
+ ASSERT_EQUALS_INT("empty test failed", 0, cstring_pop_path(s, 1));
+
+ reset();
+ cstring_add(s, "root");
+ ASSERT_EQUALS_INT("0 nbr test failed", 0, cstring_pop_path(s, 0));
+ ASSERT_EQUALS_STR("0 test failed", "root", s->string);
+
+ reset();
+ cstring_add(s, "root/");
+ ASSERT_EQUALS_INT("0² nbr test failed", 0, cstring_pop_path(s, 0));
+ ASSERT_EQUALS_STR("0² test failed", "root", s->string);
+
+ reset();
+ cstring_add(s, "/");
+ ASSERT_EQUALS_INT("root test nbr failed", 0, cstring_pop_path(s, 1));
+ ASSERT_EQUALS_STR("root test failed", "/", s->string);
+
+ reset();
+ cstring_add(s, "/");
+ ASSERT_EQUALS_INT("root² test nbr failed", 0, cstring_pop_path(s, 2));
+ ASSERT_EQUALS_STR("root² test failed", "/", s->string);
+
+ reset();
+ cstring_add(s, "/root");
+ ASSERT_EQUALS_INT("/root test nbr failed", 1, cstring_pop_path(s, 1));
+ ASSERT_EQUALS_STR("/root test failed", "/", s->string);
+
+ reset();
+ cstring_add(s, "/root");
+ ASSERT_EQUALS_INT("/root³ test nbr failed", 1, cstring_pop_path(s, 2));
+ ASSERT_EQUALS_STR("/root³ test failed", "/", s->string);
+
+ reset();
+ cstring_add(s, "/root/dir/file");
+ ASSERT_EQUALS_INT("2 test nbr failed", 2, cstring_pop_path(s, 2));
+ ASSERT_EQUALS_STR("2 test failed", "/root", s->string);
+
+ reset();
+ cstring_add(s, "/root/dir/file/");
+ ASSERT_EQUALS_INT("trailing / test nbr failed", 1,
+ cstring_pop_path(s, 1));
+ ASSERT_EQUALS_STR("trailing / test failed", "/root/dir", s->string);
+
+ END
+
+START(basename)
+ char *str;
+
+ cstring_add(s, "");
+ str = cstring_basename(s->string, NULL);
+ ASSERT_EQUALS_STR("empty test", "", str);
+ free(str);
+
+ reset();
+ cstring_add(s, "/root/path/dir/file");
+ str = cstring_basename(s->string, NULL);
+ ASSERT_EQUALS_STR("simple test", "file", str);
+ free(str);
+
+ reset();
+ cstring_add(s, "/root/path/dir/file");
+ str = cstring_basename(s->string, ".ext");
+ ASSERT_EQUALS_STR("no ext test", "file", str);
+ free(str);
+
+ reset();
+ cstring_add(s, "/root/path/dir/file.test");
+ str = cstring_basename(s->string, ".ext");
+ ASSERT_EQUALS_STR("wrong ext test", "file.test", str);
+ free(str);
+
+ reset();
+ cstring_add(s, "/root/path/dir/file.ext");
+ str = cstring_basename(s->string, ".ext");
+ ASSERT_EQUALS_STR("good ext test", "file", str);
+ free(str);
+
+ END
+
+START(dirname)
+ char *str;
+
+ cstring_add(s, "/root/path");
+ str = cstring_dirname(s->string);
+ ASSERT_EQUALS_STR("simple test", "/root", str);
+ free(str);
+
+ reset();
+ cstring_add(s, "/root/path/");
+ str = cstring_dirname(s->string);
+ ASSERT_EQUALS_STR("trailing / test", "/root", str);
+ free(str);
+
+ reset();
+ cstring_add(s, "/");
+ str = cstring_dirname(s->string);
+ ASSERT_EQUALS_STR("root is root of root test", "/", str);
+ free(str);
+
+ END
+
START(many_adds)
size_t count = 50000000;
for (size_t i = 0; i < count; i++) {
tcase_add_test(core, clone);
tcase_add_test(core, rtrim);
tcase_add_test(core, trim);
+ tcase_add_test(core, remove_crlf);
tcase_add_test(core, toupper);
tcase_add_test(core, tolower);
-
- tcase_add_test(core, readln);
+ tcase_add_test(core, readline);
+ tcase_add_test(core, add_path);
+ tcase_add_test(core, pop_path);
+ tcase_add_test(core, basename);
+ tcase_add_test(core, dirname);
suite_add_tcase(suite, core);
/*
* CUtils: some small C utilities
*
- * Copyright (C) 2012 Niki Roo
+ * Copyright (C) 2011 Niki Roo
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
/*
Name: cstring.c
- Copyright: niki (cc-by-nc) 2011
+ Copyright: niki (gpl3 or later) 2011
Author: niki
Date: 2011-06-16
- Description: cstring is a collection of helper functions to manipulate string of text
+ Description: cstring is a collection of helper functions to manipulate text
*/
#include "cstring.h"
#include <wchar.h>
#include <wctype.h>
+// For upper/lowercase
+#include <locale.h>
+
#ifndef BUFFER_SIZE
#define BUFFER_SIZE 81
#endif
#define CSTRING_SEP '/'
#endif
-//start of private prototypes
+// Private functions
typedef struct {
size_t buffer_length;
static void cstring_swap(cstring *a, cstring *b);
/** Change the case to upper -or- lower case (UTF8-compatible) */
static void cstring_change_case(cstring *self, int up);
+/** For path-related functions */
+static void normalize_path(cstring *self);
+
+// Private variables
-//end of private prototypes
+static char *locale = NULL;
+
+// end of privates
cstring *new_cstring() {
cstring *string;
cstring *buffer;
size_t i;
size_t step;
- char *swap;
int occur;
- // easy optimization:
+ // easy optimisation:
if (!from || !from[0])
return 0;
if (from && to && from[0] && to[0] && !from[1] && !to[1])
return cstring_replace_car(self->string, from[0], to[0]);
- // optimize for same-size strings?
+ // optimise for same-size strings?
step = strlen(from) - 1;
buffer = new_cstring();
}
}
- // not clean, but quicker:
- swap = self->string;
- self->string = buffer->string;
- buffer->string = swap;
- self->length = buffer->length;
-
+ cstring_swap(self, buffer);
free_cstring(buffer);
return occur;
}
return string;
}
-cstring *cstring_clone(cstring *self) {
+cstring *cstring_clone(const char self[]) {
if (self == NULL)
return NULL;
cstring *clone = new_cstring();
- cstring_add(clone, self->string);
+ cstring_add(clone, self);
return clone;
}
}
}
-size_t cstring_remove_crlf(cstring *self) {
- size_t removed;
-
- removed = cstring_sremove_crlf(self->string, self->length);
- self->length -= removed;
-
- return removed;
-}
-
-size_t cstring_sremove_crlf(char data[], size_t n) {
- size_t removed;
-
- removed = n;
- while (removed > 0
- && (data[removed - 1] == '\r' || data[removed - 1] == '\n')) {
- removed--;
- }
+size_t cstring_remove_crlf(char *self) {
+ size_t sz = strlen(self);
+ if (sz && self[sz - 1] == '\n')
+ sz--;
+ if (sz && self[sz - 1] == '\r')
+ sz--;
- data[removed] = '\0';
+ self[sz] = '\0';
- return removed;
+ return sz;
}
void cstring_toupper(cstring *self) {
}
void cstring_change_case(cstring *self, int up) {
- wchar_t *wide;
+ // Change LC_ALL to LANG if not found
+ // TODO: only take part we need (also, this is still bad practise)
+ if (!locale) {
+ locale = setlocale(LC_ALL, NULL);
+ if (!locale || !locale[0] || !strcmp("C", locale)) {
+ char *lang = getenv("LANG");
+ if (lang && lang[0]) {
+ locale = setlocale(LC_ALL, lang);
+ if (!locale)
+ locale = "";
+ }
+ }
+ }
+
+ cstring *rep;
+ mbstate_t state_from, state_to;
+ wchar_t wide;
char tmp[10];
- const char *src = self->string;
- size_t s, i;
- mbstate_t state;
-
- // init the state (passing NULL is not thread-safe)
- memset(&state, '\0', sizeof(mbstate_t));
-
- // won't contain MORE chars (but maybe less)
- wide = (wchar_t *) malloc((self->length + 1) * sizeof(wchar_t));
- s = mbsrtowcs(wide, &src, self->length, &state);
- wide[s] = (wchar_t) '\0';
- cstring_clear(self);
- for (i = 0; i <= s; i++) {
+ size_t count;
+
+ // init the state (NULL = internal hidden state, not thread-safe)
+ memset(&state_from, '\0', sizeof(mbstate_t));
+ memset(&state_to, '\0', sizeof(mbstate_t));
+
+ rep = new_cstring();
+
+ size_t i = 0;
+ while (i < self->length) {
+ count = mbrtowc(&wide, self->string + i, self->length - i, &state_from);
+
+ //incomplete (should not happen)
+ if (count == (size_t) -2) {
+ // return;
+ cstring_add_car(rep, '_');
+ i++;
+ continue;
+ }
+ // invalid multibyte sequence
+ if (count == (size_t) -1) {
+ // return;
+ cstring_add_car(rep, '_');
+ i++;
+ continue;
+ }
+
+ // End of String (should not happen, see WHILE condition)
+ if (!count)
+ break;
+
+ // char is ok
+ i += count;
+
if (up)
- wide[i] = (wchar_t) towupper((wint_t) wide[i]);
+ wide = (wchar_t) towupper((wint_t) wide);
else
- wide[i] = (wchar_t) towlower((wint_t) wide[i]);
- memset(&state, '\0', sizeof(mbstate_t));
- wcrtomb(tmp, wide[i], &state);
- cstring_add(self, tmp);
+ wide = (wchar_t) towlower((wint_t) wide);
+
+ count = wcrtomb(tmp, wide, &state_to);
+ if (count == (size_t) -1) {
+ // failed to convert :(
+ cstring_add_car(rep, '_');
+ } else {
+ tmp[count] = '\0';
+ cstring_add(rep, tmp);
+ }
}
- free(wide);
+
+ cstring_swap(self, rep);
+ free_cstring(rep);
}
int cstring_readline(cstring *self, FILE *file) {
cstring_clear(self);
buffer[0] = '\0';
- // Note: strlen() could return 0 if the file contains \0
- // at the start of a line
+ // Note: fgets() could return NULL if EOF is reached
if (!fgets(buffer, (int) BUFFER_SIZE - 1, file))
return 0;
- size = strlen(buffer);
+ size = strlen(buffer);
full_line = ((file && feof(file)) || size == 0
|| buffer[size - 1] == '\n');
- size -= cstring_sremove_crlf(buffer, size);
+ size = cstring_remove_crlf(buffer);
cstring_add(self, buffer);
// No luck, we need to continue getting data
while (!full_line) {
if (!fgets(buffer, (int) BUFFER_SIZE - 1, file))
break;
- size = strlen(buffer);
+ size = strlen(buffer);
full_line = ((file && feof(file)) || size == 0
|| buffer[size - 1] == '\n');
- size -= cstring_sremove_crlf(buffer, size);
+ size = cstring_remove_crlf(buffer);
cstring_add(self, buffer);
}
return 0;
}
+static void normalize_path(cstring *self) {
+ while (self->length && self->string[self->length - 1] == CSTRING_SEP)
+ self->length--;
+ self->string[self->length] = '\0';
+}
+
void cstring_add_path(cstring *self, const char subpath[]) {
+ while (self->length && self->string[self->length - 1] == CSTRING_SEP)
+ self->length--;
cstring_add_car(self, CSTRING_SEP);
- cstring_add(self, subpath);
+ if (subpath && subpath[0]) {
+ cstring_add(self, subpath);
+ }
+
+ normalize_path(self);
}
int cstring_pop_path(cstring *self, int how_many) {
- char sep[] = { CSTRING_SEP };
int count = 0;
+ size_t tmp;
+ char first = '\0';
- cstring_rtrim(self, CSTRING_SEP);
- for (int i = 0; i < how_many; i++) {
- size_t idx = cstring_rfind(self->string, sep, 0);
- if (!idx)
- break;
+ if (self->length)
+ first = self->string[0];
- cstring_cut_at(self, idx - 1);
- count++;
+ normalize_path(self);
+ for (int i = 0; i < how_many; i++) {
+ tmp = self->length;
+ while (self->length && self->string[self->length - 1] != CSTRING_SEP)
+ self->length--;
+ while (self->length && self->string[self->length - 1] == CSTRING_SEP)
+ self->length--;
+ if (self->length != tmp)
+ count++;
}
+ normalize_path(self);
+
+ // Root is root of root
+ if (first == CSTRING_SEP && !self->length)
+ cstring_add_car(self, CSTRING_SEP);
return count;
}
-cstring *cstring_getdir(const char path[]) {
- cstring *result;
+char *cstring_basename(const char path[], const char ext[]) {
size_t i;
-
size_t sz = strlen(path);
- i = sz - 1;
- if (i >= 0 && path[i] == CSTRING_SEP)
+ i = sz;
+ while (i && path[i] != CSTRING_SEP)
i--;
- for (; i >= 0 && path[i] != CSTRING_SEP; i--)
- ;
-
- if (i < 0)
- return new_cstring();
-
- result = new_cstring();
- cstring_addn(result, path, i);
-
- return result;
-}
-
-cstring *cstring_getfile(cstring *path) {
- cstring *result;
- ssize_t i;
- i = (ssize_t) path->length - 1;
- if (i >= 0 && path->string[i] == CSTRING_SEP)
- i--;
- for (; i >= 0 && path->string[i] != CSTRING_SEP; i--)
- ;
+ cstring *rep;
+ if (path[i] != CSTRING_SEP) {
+ rep = cstring_clone(path);
+ } else {
+ rep = new_cstring();
+ cstring_addf(rep, path, i + 1);
+ }
- if (i < 0 || (size_t) (i + 1) >= path->length)
- return new_cstring();
+ if (ext && ext[0] && cstring_ends_with(rep->string, ext)) {
+ cstring_cut_at(rep, rep->length - strlen(ext));
+ }
- result = new_cstring();
- cstring_add(result, path->string + i + 1);
- return result;
+ return cstring_convert(rep);
}
-cstring *cstring_getfiles(const char path[]) {
- cstring *copy = new_cstring();
- cstring_add(copy, path);
-
- cstring *result = cstring_getfile(copy);
-
- free_cstring(copy);
-
- return result;
+char *cstring_dirname(const char path[]) {
+ cstring *rep = cstring_clone(path);
+ cstring_pop_path(rep, 1);
+ return cstring_convert(rep);
}
-int cstring_is_whole(cstring *self) {
- return mbstowcs(NULL, self->string, 0) != (size_t) -1;
+int cstring_is_utf8(cstring *self) {
+ size_t rep = mbstowcs(NULL, self->string, 0);
+ // -2 = invalid, -1 = not whole
+ return (rep != (size_t) -2) && (rep != (size_t) -1);
}
*
* @param self the string to clone
*/
-cstring *cstring_clone(cstring *self);
+cstring *cstring_clone(const char self[]);
/**
* Trim this cstring of all trailing 'car' instances.
*/
void cstring_trim(cstring *self, char car);
+/**
+ * Remove the \r and \n sequence (or one OR the other) at the end of the string.
+ *
+ * @param self the string to change
+ *
+ * @return the new length of the string
+ */
+size_t cstring_remove_crlf(char *self);
+
/**
* Change the case to upper-case (UTF-8 compatible, but the string MUST be
* whole).
*
+ * @note: if LC_ALL is not set or is set to C and a viable $LANG exists, it will
+ * set LC_ALL to $LANG
+ *
* @param self the cstring to work on
*/
void cstring_toupper(cstring *self);
* Change the case to lower-case (UTF-8 compatible, but the string MUST be
* whole).
*
+ * @note: if LC_ALL is not set or is set to C and a viable $LANG exists, it will
+ * set LC_ALL to $LANG
+ *
* @param self the cstring to work on
*/
void cstring_tolower(cstring *self);
*/
int cstring_readline(cstring *self, FILE *file);
-/**
- * Read a whole line (CR, LN or CR+LN terminated) from the given socket.
- *
- * @param self the cstring to read into
- * @param fd the socket to read from
- *
- * @return 1 if a line was read, 0 if not
- */
-int cstring_readnet(cstring *self, int fd);
-
/**
* Add a path to the given cstring (if it is currently empty, it
* will result in a root path).
*/
void cstring_add_path(cstring *self, const char subpath[]);
-//TODO: desc
+/**
+ * Remove the <tt>how_many</tt> components of the path described by this
+ * cstring. Will ignore extra path separators and always trim it from the final
+ * result (i.e., <tt>some//path/</tt> is identical to <tt>some/path</tt>).
+ *
+ * @note popping "0" path will simply make sure the string does not end in "/"
+ *
+ * @param how_many how many path components to remove (for instance, to go from
+ * <tt>/some/path/to/file</tt> to <tt>/some/path</tt> you would need 2)
+ */
int cstring_pop_path(cstring *self, int how_many);
/**
* '/home/user/file.ext' becomes 'file.ext').
*
* @param path the path to get the dir of (it can be a dir itself)
+ * @param ext the extension to remove if any (can be empty or NULL for none)
+ *
+ * @note the extension should include the "." if any
*
* @return a new string representing the parent directory
*/
-char *cstring_basename(const char path[]);
+char *cstring_basename(const char path[], const char ext[]);
/**
* Return the dirname of this path (for instance,
char *cstring_dirname(const char path[]);
/**
- * Return the latest path component of this path (usually a FILE).
- *
- * @param path the path to get the basename of (it can be a dir itself)
- *
- * @return a new cstring representing the latest path component
- */
-cstring *cstring_getfile(cstring *path);
-
-/**
- * Return the latest path component of this path (usually a FILE).
- *
- * @param path the path to get the basename of (it can be a dir itself)
- *
- * @return a new string representing the latest path component
- */
-cstring *cstring_getfiles(const char path[]);
-
-/**
- * Remove all the \r and \n at the end of the given cstring.
- *
- * @param self the cstring to change
- *
- * @return how many removed characters
- */
-size_t cstring_remove_crlf(cstring *self);
-
-/**
- * Remove all the \r and \n at the end of the given string.
- *
- * @param self the string to change
- * @param n the size of the string
- *
- * @return how many removed characters
- */
-size_t cstring_sremove_crlf(char *self, size_t n);
-
-/**
- * Check if the string is whole (i.e., it doesn't contain incomplete UTF-8
- * sequences).
+ * Check if the string is a correct and whole UTF-8 string (i.e., it is indeed
+ * an UTF-8 string and doesn't contain incomplete UTF-8 sequences).
*
- * @return TRUE if it is whole
+ * @return TRUE if it is UTF-8
*/
-int cstring_is_whole(cstring *self);
+int cstring_is_utf8(cstring *self);
#endif