diff options
author | Jakob Kaivo <jkk@ung.org> | 2024-05-31 16:45:22 -0400 |
---|---|---|
committer | Jakob Kaivo <jkk@ung.org> | 2024-05-31 16:45:22 -0400 |
commit | 709223a2f015966cd1c15b15dc4f7a56784ceac2 (patch) | |
tree | 3bb0f426c870e49a09523c89d191c806871fc1c3 | |
parent | 55d2e06ec5afee20d56fc8a48d4c4d75e9afdbe0 (diff) |
initial implementation of __scanf()
-rw-r--r-- | mk/__conv.d | 9 | ||||
-rw-r--r-- | mk/__scanf.d | 1 | ||||
-rw-r--r-- | mk/all.mk | 1 | ||||
-rw-r--r-- | mk/deps.mk | 4 | ||||
-rw-r--r-- | src/stdio/__conv.c | 114 | ||||
-rw-r--r-- | src/stdio/__scanf.c | 237 | ||||
-rw-r--r-- | src/stdio/_stdio.h | 31 |
7 files changed, 393 insertions, 4 deletions
diff --git a/mk/__conv.d b/mk/__conv.d new file mode 100644 index 00000000..44f2589c --- /dev/null +++ b/mk/__conv.d @@ -0,0 +1,9 @@ +libc_C.0: libc.a(__conv.o) +libc.a(__conv.o): $(OBJDIR)/__conv.o + @$(AR) $(ARFLAGS) $@ $(OBJDIR)/$% + +$(OBJDIR)/__conv.o: src/stdio/__conv.c +$(OBJDIR)/__conv.o: +$(OBJDIR)/__conv.o: + @mkdir -p $(@D) + $(CC) -c -o $@ $(CFLAGS) src/stdio/__conv.c diff --git a/mk/__scanf.d b/mk/__scanf.d index 84677c9c..930313ea 100644 --- a/mk/__scanf.d +++ b/mk/__scanf.d @@ -4,6 +4,7 @@ libc.a(__scanf.o): $(OBJDIR)/__scanf.o $(OBJDIR)/__scanf.o: src/stdio/__scanf.c $(OBJDIR)/__scanf.o: src/stdio/_stdio.h +$(OBJDIR)/__scanf.o: src/_safety.h $(OBJDIR)/__scanf.o: @mkdir -p $(@D) $(CC) -c -o $@ $(CFLAGS) src/stdio/__scanf.c @@ -286,6 +286,7 @@ include mk/setvbuf.d include mk/sscanf.d include mk/fclose.d include mk/fgets.d +include mk/__conv.d include mk/vsscanf_s.d include mk/__stderr.d include mk/fgetc.d @@ -1154,6 +1154,10 @@ all: mk/fgets.d mk/fgets.d: src/stdio/fgets.c sh mk/deps.sh src/stdio/fgets.c +all: mk/__conv.d +mk/__conv.d: src/stdio/__conv.c + sh mk/deps.sh src/stdio/__conv.c + all: mk/vsscanf_s.d mk/vsscanf_s.d: src/stdio/vsscanf_s.c sh mk/deps.sh src/stdio/vsscanf_s.c diff --git a/src/stdio/__conv.c b/src/stdio/__conv.c new file mode 100644 index 00000000..68d86d54 --- /dev/null +++ b/src/stdio/__conv.c @@ -0,0 +1,114 @@ +#include <inttypes.h> +#include <ctype.h> +#include <string.h> +#include "_conversion.h" + +/* +struct io_conversion { + enum { IO_IN, IO_OUT } dir; + enum { + F_STAR = (1<<0), + F_LEFT = (1<<1), + F_SIGN = (1<<2), + F_SPACE = (1<<3), + F_ALT = (1<<4), + F_ZERO = (1<<4), + } flags; + enum { + L_default, + L_hh, + L_h, + L_l, + L_ll, + L_j, + L_z, + L_t, + L_L, + } length; + int has_width:1; + int has_precision:1; + uintmax_t width; + uintmax_t precision; + char spec; +}; +*/ + +size_t __conv(const char *format, struct io_conversion *conv) +{ + size_t ret = 0; + + if (format[0] != '%') { + return 0; + } + + ret++; + + conv->flags = 0; + conv->length = L_default; + conv->has_width = 0; + conv->has_precision = 0; + + while (strchr("*-+ #0", format[ret])) { + switch (format[ret]) { + case '*': conv->flags |= F_STAR; break; + case '-': conv->flags |= F_LEFT; break; + case '+': conv->flags |= F_SIGN; break; + case ' ': conv->flags |= F_SPACE; break; + case '#': conv->flags |= F_ALT; break; + case '0': conv->flags |= F_ZERO; break; + } + /* check for invalid input flags (only * is allowed) */ + ret++; + } + + if (isdigit(format[ret])) { + char *end = NULL; + conv->has_width = 1; + conv->width = strtoumax(format + ret, &end, 10); + ret += (size_t)(end - (format + ret)); + } + + /* TODO: precision */ + + if (strchr("hljztL", format[ret])) { + switch (format[ret]) { + case 'h': + if (format[ret + 1] == 'h') { + ret++; + conv->length = L_hh; + } else { + conv->length = L_h; + } + break; + case 'l': + if (format[ret + 1] == 'l') { + ret++; + conv->length = L_ll; + } else { + conv->length = L_l; + } + break; + case 'j': + conv->length = L_j; + break; + case 'z': + conv->length = L_z; + break; + case 't': + conv->length = L_t; + break; + case 'L': + conv->length = L_L; + break; + default: + break; + } + ret++; + } + + /* TODO: validate */ + + conv->spec = format[ret]; + ret++; + return ret; +} diff --git a/src/stdio/__scanf.c b/src/stdio/__scanf.c index ed837b44..3252083a 100644 --- a/src/stdio/__scanf.c +++ b/src/stdio/__scanf.c @@ -1,16 +1,245 @@ +#include <ctype.h> +#include <inttypes.h> +#include <limits.h> #include <stdarg.h> #include <stdio.h> +#include <stdlib.h> +#include <string.h> #include "_stdio.h" +#include "_safety.h" + +#pragma GCC diagnostic ignored "-Wint-conversion" +#pragma GCC diagnostic ignored "-Wtype-limits" + +#define ASSIGN(__t, __arg_list, __val, __min, __max) do { \ + if ((__min != 0 && (__val) < (__min)) || (__val) > (__max)) { \ + UNDEFINED("Assignment to %s would overflow", #__t); \ + } \ + __t *__ptr = va_arg(__arg_list, __t *); \ + *__ptr = (__t)(__val); \ + } while (0) + +static int __unget(struct io_options *opt, int c) +{ + if (opt->stream) { + return ungetc(c, opt->stream); + } + return opt->string[--(opt->pos)] = c; +} + +static int __get(struct io_options *opt) +{ + if (opt->stream) { + return fgetc(opt->stream); + } + return opt->string[opt->pos++]; +} + +GCC_SSE_HACK +static uintmax_t __get_uint(struct io_options *opt, char basec) +{ + char buf[BUFSIZ] = {0}; + size_t pos = 0; + int base = 10; + char *end = NULL; + if (basec == 'x' || basec == 'X') { + base = 16; + } + + /* TODO: skip whitespace */ + while (1) { + int c = __get(opt); + if (c == EOF) { + buf[pos] = '\0'; + break; + } + buf[pos++] = c; + end = NULL; + strtoumax(buf, &end, base); + if (end && *end) { + break; + } + } + + end = NULL; + uintmax_t ret = strtoumax(buf, &end, base); + __unget(opt, *end); + return ret; +} + +GCC_SSE_HACK +static intmax_t __get_int(struct io_options *opt, char basec) +{ + char buf[BUFSIZ] = {0}; + size_t pos = 0; + int base = 0; + char *end = NULL; + if (basec == 'd') { + base = 10; + } else if (basec == 'o') { + base = 8; + } + + /* TODO: skip whitespace */ + while (1) { + int c = __get(opt); + if (c == EOF) { + buf[pos] = '\0'; + break; + } + buf[pos] = c; + strtoimax(buf, &end, base); + if (end && *end) { + break; + } + } + + intmax_t ret = strtoimax(buf, &end, base); + __unget(opt, *end); + return ret; +} int __scanf(struct io_options *opt, const char * format, va_list arg) { - (void)opt; - (void)format; + int ret = 0; + + SIGNAL_SAFE(0); + + if (opt->string) { + opt->pos = 0; + } + + while (*format) { + struct io_conversion conv = { .dir = IO_IN }; + + if (isspace(*format)) { + int c = 0; + + while (isspace(*format)) { + format++; + } + + while ((c = __get(opt)) != EOF) { + if (!isspace(c)) { + __unget(opt, c); + break; + } + } + + format++; + continue; + } + + if (*format != '%') { + int c = __get(opt); + if (c == *format) { + format++; + continue; + } + __unget(opt, c); + break; + } + + format += __conv(format, &conv); + + switch (conv.spec) { + case 'd': /* base 10 int */ + case 'i': /* unknown base int */ + case 'o': /* base 8 int */ + intmax_t i = __get_int(opt, conv.spec); + switch (conv.length) { + case L_hh: ASSIGN(signed char, arg, i, SCHAR_MIN, SCHAR_MAX); break; + case L_h: ASSIGN(short int, arg, i, SHRT_MIN, SHRT_MAX); break; + case L_l: ASSIGN(long int, arg, i, LONG_MIN, LONG_MAX); break; + case L_ll: ASSIGN(long long int, arg, i, LLONG_MIN, LLONG_MAX); break; + case L_j: ASSIGN(intmax_t, arg, i, INTMAX_MIN, INTMAX_MAX); break; + //case L_z: ASSIGN(signed size_t, arg, i, 0, 0); break; /* TODO!!! */ + //case L_t: ASSIGN(signed ptrdiff_t, arg, i, 0, 0); break; /* TODO!!! */ + default: ASSIGN(int, arg, i, INT_MIN, INT_MAX); break; + + /* case L_L: UNDEFINED(""); break; */ + } + break; + + case 'u': /* base 10 unsigned */ + case 'x': + case 'X': /* base 16 unsigned */ + uintmax_t u = __get_uint(opt, conv.spec); + switch (conv.length) { + case L_hh: ASSIGN(unsigned char, arg, u, 0, UCHAR_MAX); break; + case L_h: ASSIGN(unsigned short int, arg, u, 0, USHRT_MAX); break; + case L_l: ASSIGN(unsigned long int, arg, u, 0, ULONG_MAX); break; + case L_ll: ASSIGN(unsigned long long int, arg, u, 0, ULLONG_MAX); break; + case L_j: ASSIGN(uintmax_t, arg, u, 0, UINTMAX_MAX); break; + case L_z: ASSIGN(size_t, arg, u, 0, SIZE_MAX); break; + case L_t: ASSIGN(ptrdiff_t, arg, u, 0, PTRDIFF_MAX); break; + default: ASSIGN(unsigned int, arg, u, 0, UINT_MAX); break; + /* case L_L: UNDEFINED(""); break; */ + } + break; + + case 'a': + case 'A': + case 'e': + case 'E': + case 'f': + case 'F': + case 'g': + case 'G': + /* strtod */ + + case 'c': + /* width (default 1) characters */ + if (conv.has_width == 0) { + conv.width = 1; + } + break; + + case 's': + if (conv.length != L_default && conv.length != L_l) { + //__bad_length(length, 's'); + } + + char *str = va_arg(arg, char *); + + /* TODO: only use widht if conv.has_width == 1 */ + for (uintmax_t i = 0; i < conv.width; i++) { + int c = __get(opt); + if (isspace(c)) { + __unget(opt, c); + break; + } + if ((conv.flags & F_STAR) != F_STAR) { + str[i] = c; + } + } + break; + + case '[': + /* scanset */ + + case 'p': + /* previous printf("%p"); */ + + case 'n': + /* output a number */ + + case '%': + /* match % literal */ + break; + + default: + UNDEFINED("Unknown conversion specifier '%c'", *format); + break; + } + + format++; + } + (void)arg; - return 0; + return ret; } /* STDC(0) -SIGNAL_SAFE(0) */ diff --git a/src/stdio/_stdio.h b/src/stdio/_stdio.h index 80e25546..f363bd75 100644 --- a/src/stdio/_stdio.h +++ b/src/stdio/_stdio.h @@ -1,6 +1,7 @@ #ifndef ___STDIO_H__ #define ___STDIO_H__ +#include <inttypes.h> #include <stddef.h> #include <stdarg.h> #include <stdio.h> @@ -91,9 +92,39 @@ struct io_options { FILE *stream; /* NULL or the output stream */ int fd; /* -1 or the output file descriptor */ size_t maxlen; /* max number of bytes to write to string */ + size_t pos; /* current index in string */ int ret; /* return value */ }; +struct io_conversion { + enum { IO_IN, IO_OUT } dir; + enum { + F_STAR = (1<<0), + F_LEFT = (1<<1), + F_SIGN = (1<<2), + F_SPACE = (1<<3), + F_ALT = (1<<4), + F_ZERO = (1<<4), + } flags; + enum { + L_default, + L_hh, + L_h, + L_l, + L_ll, + L_j, + L_z, + L_t, + L_L, + } length; + int has_width:1; + int has_precision:1; + uintmax_t width; + uintmax_t precision; + char spec; +}; + +size_t __conv(const char *, struct io_conversion *); int __printf(struct io_options * restrict, const char * restrict, va_list); int __scanf(struct io_options * restrict, const char * restrict, va_list); |