raw ebcdic support + debug, csv... params
authorNiki <david.roulet@solidaris.be>
Mon, 24 Jun 2024 13:58:53 +0000 (15:58 +0200)
committerNiki <david.roulet@solidaris.be>
Mon, 24 Jun 2024 13:58:53 +0000 (15:58 +0200)
src/cbook/cbook.h
src/cbook/cbook_csv.c
src/cbook/cbook_main.c
src/cbook/cbook_pl1.c
src/cutils

index 77ec54c5c6121019069a49d78c6c9cfc0f101820..7aebd7b5ba773568396a37b2e7ae92360ba15b25 100755 (executable)
@@ -56,6 +56,7 @@ typedef int CBOOK_OUT;
 
 typedef struct {
        char *name;
+       int raw_ebcdic;
        int started;
        int finished;
        int unaligned;
@@ -65,6 +66,7 @@ typedef struct {
        char *err_mess;
        char *err_field;
        CBOOK_OUT out_fmt;
+       size_t bytes;
        array_t *lines;
 } book_t;
 
@@ -90,6 +92,7 @@ int read_book(FILE *book_file, book_t *book);
 
 int write_header(FILE *outfile, book_t *book);
 int write_csv(FILE *output, book_t *book, cstring_t *data, size_t lino);
+void write_done();
 
 #endif /* CBOOK_H */
 
index 32e51f2538f72a2b2fa2da9b9f68929b3e1fbd97..135922d5204f1da3db7ed2d4688952d946404c4f 100755 (executable)
 
 // DECLARATIONS
 
+static cstring_t *ascii_line = NULL;
+
 // Actual write code, but may write either data or header
-int write_line(FILE *outfile, book_t *book, cstring_t *data, 
+static int write_line(FILE *outfile, book_t *book, cstring_t *data, 
                int header, size_t lino);
 
 // Write one field (including subfields), consume data and report errors
-char *write_field(FILE *outfile, book_t *book, line_t *field, 
+static char *write_field(FILE *outfile, book_t *book, line_t *field, 
                char *data, size_t *remaining, size_t lino);
 
 // Write this one field (data is ready), report errors in book->err_mess
-void one_field(FILE *outfile, book_t *book, line_t *field, char *data);
+static void one_field(FILE *outfile, book_t *book, line_t *field, char *data);
+
+// Convert EBCDIC to ASCII
+static void ascii(FILE *outfile, char *ebcdic, size_t sz);
 
 // PUBLIC
 
@@ -45,17 +50,21 @@ int write_header(FILE *outfile, book_t *book) {
                return 1;
 
        return write_line(outfile, book, NULL, 1, 0);
-       fwrite("\n", 1, 1, outfile);
 }
 
 int write_csv(FILE *outfile, book_t *book, cstring_t *data, size_t lino) {
        return write_line(outfile, book, data, 0, lino);
 }
 
+void write_done() {
+       free_cstring(ascii_line);
+       ascii_line = NULL;
+}
+
 // PRIVATE
 
-int write_line(FILE *outfile, book_t *book, cstring_t *dataline, int header,
-               size_t lino) {
+static int write_line(FILE *outfile, book_t *book, cstring_t *dataline, 
+               int header, size_t lino) {
        size_t remaining = 0;
        char *data = NULL;
 
@@ -102,11 +111,13 @@ int write_line(FILE *outfile, book_t *book, cstring_t *dataline, int header,
        }
 
        fwrite("\n", 1, 1, outfile);
+       if (book->out_fmt == CBOOK_OUT_FIELDS)
+               fwrite("\n", 1, 1, outfile);
 
        return 1;
 }
 
-char *write_field(FILE *outfile, book_t *book, line_t *field, 
+static char *write_field(FILE *outfile, book_t *book, line_t *field, 
                char *data, size_t *remaining, size_t lino) {
        if (field->type == CBOOK_FMT_GROUP) {
                array_loop(field->children, subfield, line_t *) {
@@ -145,7 +156,11 @@ char *write_field(FILE *outfile, book_t *book, line_t *field,
        return data;
 }
 
-void one_field(FILE *outfile, book_t *book, line_t *field, char *data) {
+static void one_field(FILE *outfile, book_t *book, line_t *field, char *data) {
+       if (!ascii_line)
+               ascii_line = new_cstring();
+       cstring_clear(ascii_line);
+
        switch(book->out_fmt) {
        case CBOOK_OUT_CSV  : break;
        case CBOOK_OUT_FIELDS:
@@ -156,14 +171,32 @@ void one_field(FILE *outfile, book_t *book, line_t *field, char *data) {
 
        switch(field->type) {
        case CBOOK_FMT_CHAR:
-               fwrite(data, 1, field->bytes, outfile);
+               fwrite("\"", 1, 1, outfile);
+               if (book->raw_ebcdic) {
+                       ascii(outfile, data, field->bytes);
+                       fwrite(ascii_line->string, 1, 
+                                       ascii_line->length, outfile);
+               } else {
+                       char *d = data;
+                       if (cstring_find(d, "\"", 0) >= 0) {
+                               cstring_t *tmp = new_cstring();
+                               cstring_addfN(tmp, d, 0, field->bytes);
+                               cstring_replace(tmp, "\"", "\"\"");
+                               d = cstring_convert(tmp);
+                       }
+                       fwrite(d, 1, field->bytes, outfile);
+                       if (d != data)
+                               free(d);
+               }
+               fwrite("\"", 1, 1, outfile);
+
                break;
        case CBOOK_FMT_VARCHAR:
                // first 2 bytes -> BIN FIXED(15)
                ;
                // TODO:
                size_t coded = field->bytes - 2;
-
+               
                data = data + 2;
                if (coded > (field->bytes - 2)) {
                        char buf[100];
@@ -173,8 +206,14 @@ void one_field(FILE *outfile, book_t *book, line_t *field, char *data) {
                                buf, NULL
                        );
                }
-
-               fwrite(data, 1, coded, outfile);
+               
+               size_t bkp = field->bytes;
+               field->bytes = coded;
+               field->type = CBOOK_FMT_CHAR;
+               one_field(outfile, book, field, data);
+               field->type = CBOOK_FMT_VARCHAR;
+               field->bytes = bkp;
+       
                break;
        case CBOOK_FMT_DECIMAL: 
        case CBOOK_FMT_UDECIMAL:
@@ -274,8 +313,11 @@ void one_field(FILE *outfile, book_t *book, line_t *field, char *data) {
 
                }
 
-               if (!positive)
+               if (positive)
+                       fwrite(" ", 1, 1, outfile);
+               else
                        fwrite("-", 1, 1, outfile);
+
                fwrite(str->string, 1, str->length, outfile);
 
                free_cstring(str);
@@ -323,3 +365,80 @@ void one_field(FILE *outfile, book_t *book, line_t *field, char *data) {
                break;
        }
 }
+
+static void ascii(FILE *outfile, char *ebcdic, size_t sz) {
+       /* - Professor: "So the American government went to 
+        * IBM to come up with an encryption standard, 
+        * and they came up with..."
+        * - Student: "EBCDIC!"
+        */
+
+       if (!ascii_line)
+               ascii_line = new_cstring();
+
+       cstring_grow_to(ascii_line, sz); // should be enough in most cases
+
+       for (size_t i = 0 ; i < sz ; i++) {
+               unsigned char byte = ebcdic[i];
+               if ((byte >= 0xF0) && (byte <= 0xF9)) { /* 0 - 9 */
+                       cstring_add_car(ascii_line, byte - 0xC0);
+               } else if ((byte >= 0xC1) && (byte <= 0xC9)) { /* A - I */
+                       cstring_add_car(ascii_line, byte - 0x80);
+               } else if ((byte >= 0xD1) && (byte <= 0xD9)) { /* J - R */
+                       cstring_add_car(ascii_line, byte - 0x87);
+               } else if ((byte >= 0xE2) && (byte <= 0xE9)) { /* S - Z */
+                       cstring_add_car(ascii_line, byte - 0x8F);
+               } else if ((byte >= 0x81) && (byte <= 0x89)) { /* a - i */
+                       cstring_add_car(ascii_line, byte - 0x20);
+               } else if ((byte >= 0x91) && (byte <= 0x99)) { /* j - r */
+                       cstring_add_car(ascii_line, byte - 0x27);
+               } else if ((byte >= 0xA2) && (byte <= 0xA9)) { /* s - z */
+                       cstring_add_car(ascii_line, byte - 0x2F);
+               } else {
+                       switch(byte) {
+                       case 0x40: cstring_add_car(ascii_line, ' '); break;
+                       case 0x4F: cstring_add_car(ascii_line, '!'); break;
+                       case 0x7F: // double it 
+                                  cstring_add_car(ascii_line, '"'); 
+                                  cstring_add_car(ascii_line, '"'); 
+                       break;
+                       case 0x7B: cstring_add_car(ascii_line, '#'); break;
+                       case 0x5B: cstring_add_car(ascii_line, '$'); break;
+                       case 0x6C: cstring_add_car(ascii_line, '%'); break;
+                       case 0x50: cstring_add_car(ascii_line, '&'); break;
+                       case 0x7D: cstring_add_car(ascii_line, '\''); break;
+                       case 0x4D: cstring_add_car(ascii_line, '('); break;
+                       case 0x5D: cstring_add_car(ascii_line, ')'); break;
+                       case 0x5C: cstring_add_car(ascii_line, '*'); break;
+                       case 0x4E: cstring_add_car(ascii_line, '+'); break;
+                       case 0x6B: cstring_add_car(ascii_line, ','); break;
+                       case 0x60: cstring_add_car(ascii_line, '-'); break;
+                       case 0x4B: cstring_add_car(ascii_line, '.'); break;
+                       case 0x61: cstring_add_car(ascii_line, '/'); break;
+                       case 0x7A: cstring_add_car(ascii_line, ':'); break;
+                       case 0x5E: cstring_add_car(ascii_line, ';'); break;
+                       case 0x4C: cstring_add_car(ascii_line, '<'); break;
+                       case 0x7E: cstring_add_car(ascii_line, '='); break;
+                       case 0x6E: cstring_add_car(ascii_line, '>'); break;
+                       case 0x6F: cstring_add_car(ascii_line, '?'); break;
+                       case 0x7C: cstring_add_car(ascii_line, '@'); break;
+                       case 0x4A: cstring_add_car(ascii_line, '['); break;
+                       case 0xE0: cstring_add_car(ascii_line, '\\'); break;
+                       case 0x5A: cstring_add_car(ascii_line, ']'); break;
+                       case 0x5F: cstring_add_car(ascii_line, '^'); break;
+                       case 0x6D: cstring_add_car(ascii_line, '_'); break;
+                       case 0x79: cstring_add_car(ascii_line, '`'); break;
+                       case 0xC0: cstring_add_car(ascii_line, '{'); break;
+                       case 0x6A: cstring_add_car(ascii_line, '|'); break;
+                       case 0xD0: cstring_add_car(ascii_line, '}'); break;
+                       case 0xA1: cstring_add_car(ascii_line, '~'); break;
+                       // TODO: 0xC2A5 = 0xE0 \ (idem à 0x5C !!!)
+                       default:
+                               // 0x2117 = (P) or ℗ char
+                               cstring_add_car(ascii_line, 0x21);
+                               cstring_add_car(ascii_line, 0x17);
+                               break;
+                       }
+               }
+       }
+}
index c21edcb1145e0fb11363a1e48bc5e3d3ecf88a74..4d54c253ef8b732cc31fc23e7c269567807ebb4e 100755 (executable)
 #include <errno.h>
 
 #include "cbook.h"
-#include "cutils/cstring.h"
+#include "cutils/cutils.h"
 
 /* Declarations */
 
+int readline(book_t *book, cstring_t *line, FILE *infile, size_t lino);
 void help(char *program);
 
 /* Public */
@@ -35,14 +36,22 @@ int main(int argc, char **argv) {
        char *input = NULL;
        char *output = NULL;
        
-       // TODO: configure that with params
        CBOOK_OUT out_fmt = CBOOK_OUT_CSV;
-       int ignore_errors = 1;
-       int debug = 1;
+       int ignore_errors = 0;
+       int debug = 0;
+       int raw_ebcdic = 0;
 
        for (int i = 1; i < argc; i++) {
                char *arg = argv[i];
-               if (!strcmp("--help", arg) || !strcmp("-h", arg)) {
+               if (!strcmp("--debug", arg) || !strcmp("-d", arg)) {
+                       debug = 1;
+               } else if (!strcmp("--output-flat", arg) || !strcmp("-o", arg)){
+                       out_fmt = CBOOK_OUT_FIELDS;
+               } else if (!strcmp("--raw-ebcdic", arg) || !strcmp("-r", arg)) {
+                       raw_ebcdic = 1;
+               } else if (!strcmp("--ignore-errors", arg)||!strcmp("-i", arg)){
+                       ignore_errors = 1;
+               } else if (!strcmp("--help", arg) || !strcmp("-h", arg)) {
                        help(argv[0]);
                        return 0;
                } else if (!strcmp("--book", arg) || !strcmp("-b", arg)) {
@@ -99,6 +108,7 @@ int main(int argc, char **argv) {
        }
        
        book_t *book = new_book();
+       book->raw_ebcdic = raw_ebcdic;
        book->out_fmt = out_fmt;
        book->ignore_errors = ignore_errors;
        book->debug = debug;
@@ -119,29 +129,67 @@ int main(int argc, char **argv) {
        
        size_t lino = 0;
        cstring_t *line = new_cstring();
-       while(cstring_readline(line, infile)) {
-               lino++;
+       int write_ok = 1;
+       while(readline(book, line, infile, ++lino)) {
                if (!write_csv(outfile, book, line, lino)) {
-                       fprintf(stderr, "Failure to write CSV output file\n");
-                       fprintf(stderr, "Error on line %zu, field <%s>: %s\n", 
-                               book->err_line,book->err_field,book->err_mess);
-                       fwrite("\n", 1, 1, outfile);
-                       return 4;
+                       write_ok = 0;
+                       break;
                }
-
-               fwrite("\n", 1, 1, outfile);
        }
+       write_done();
        
        if (infile != stdin)
                fclose(infile);
        if (outfile != stdout)
                fclose(stdout);
+       
+       if (!write_ok || book->err_mess) {
+               if (!write_ok)
+                       fprintf(stderr, "Failure to write CSV output file\n");
+               else
+                       fprintf(stderr, "Failure to read data input file\n");
+               fprintf(stderr, "Error on line %zu, field <%s>: %s\n", 
+                       book->err_line,book->err_field,book->err_mess);
+               fwrite("\n", 1, 1, outfile);
+               
+               if (!write_ok)
+                       return 4;
+               return 5;
+       }
 
        return 0;
 }
 
 /* Private */
 
+int readline(book_t *book, cstring_t *line, FILE *infile, size_t lino) {
+       if (!book->raw_ebcdic)
+               return cstring_readline(line, infile);
+       
+       cstring_clear(line);
+       cstring_grow_to(line, book->bytes + 1);
+       size_t count = fread(line->string, 1, book->bytes, infile);
+       if (count != book->bytes) {
+               if (!count && feof(infile))
+                       return 0;
+
+               char zu[100];
+               cstring_t *tmp = new_cstring();
+               cstring_add(tmp, "Read error: ");
+               sprintf(zu, "%zu", book->bytes); cstring_add(tmp, zu);
+               cstring_add(tmp, " bytes expected, ");
+               sprintf(zu, "%zu", count);       cstring_add(tmp, zu);
+               cstring_add(tmp, " read");
+               book->err_line = lino;
+               book->err_mess = cstring_convert(tmp);
+               return 0;
+       }
+       line->length = count;
+       line->string[count] = '\0';
+
+       return 1;
+}
+
 void help(char *program) {
        printf("CBook data conversion program\n");
        printf("\n");
index 27f8bf0b1b3009ed4681911edfc4fa4c16e3c564..b8309c0dafbde5ad3eb29925de8fee46db950024 100755 (executable)
@@ -177,6 +177,11 @@ skip:
                }
        }
        
+       book->bytes = 0;
+       array_loop(book->lines, line, line_t *) {
+               book->bytes += (*line)->bytes;
+       }
+
        reorder_lines(book);
 
        return 1;
index 8a3dcb4e6495ca5a0fd8846a4e4dc6ed52e90136..6e1a12739841bc4f7d4e5a2fc8b36d72fc0e8cb2 160000 (submodule)
@@ -1 +1 @@
-Subproject commit 8a3dcb4e6495ca5a0fd8846a4e4dc6ed52e90136
+Subproject commit 6e1a12739841bc4f7d4e5a2fc8b36d72fc0e8cb2