diff options
-rw-r--r-- | .gitignore | 10 | ||||
-rw-r--r-- | Makefile | 42 | ||||
-rw-r--r-- | cpp.h | 11 | ||||
-rw-r--r-- | cpp.y | 174 | ||||
-rw-r--r-- | link.c | 7 | ||||
-rw-r--r-- | link.h | 6 | ||||
-rw-r--r-- | main.c | 258 | ||||
-rw-r--r-- | trigraph.c | 53 | ||||
-rw-r--r-- | trigraph.h | 6 | ||||
-rw-r--r-- | trigraph.l | 44 | ||||
-rw-r--r-- | version.h | 14 |
11 files changed, 546 insertions, 79 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7f5d6d0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +cc +c89 +c99 +lint +cflow +ctags +*.o +trigraph.c +*.tab.c +*.tab.h @@ -1,33 +1,23 @@ .POSIX: -.SILENT: all clean .util.mk +CFLAGS=-g -D_POSIX_C_SOURCE=200809L +YFLAGS=-d +LIBS=-ly -ll +PROGRAM=cc +SYMLINKS=c89 c99 lint cflow ctags +OBJECTS=main.o trigraph.o cpp.tab.o link.o +GENERATED=trigraph.c *.tab.c *.tab.h -CFLAGS=-std=gnu99 -D_XOPEN_SOURCE=700 -g +all: $(PROGRAM) $(SYMLINKS) -all: .util.mk - make -f .util.mk --no-print-directory +$(SYMLINKS): $(PROGRAM) + ln -s $(PROGRAM) $@ -deps: clean .util.mk +cpp.tab.c cpp.tab.h: cpp.y + $(YACC) $(YFLAGS) -p cpp -b cpp cpp.y -clean: - [ -f .util.mk ] && make -k --no-print-directory -f .util.mk clean || true - rm -f .util.mk +cc: $(OBJECTS) + $(CC) -o $@ $(OBJECTS) $(LIBS) -.util.mk: . Makefile - printf '.POSIX:\n\n' > $@ - printf '.SUFFIXES: .cat .msg\n\n' >> $@ - printf 'default: all\n\n' >> $@ - printf 'CFLAGS=$(CFLAGS)\n' >> $@ - printf 'UTILITY=%s\n' "$$(basename -s .c $$(grep -l ^main *.c | head -n1))" >> $@ - printf 'SOURCES=%s\n' "$$(ls -1 *.c | tr '\n' ' ')" >> $@ - printf 'HEADERS=%s\n' "$$(ls -1 *.h 2>/dev/null | tr '\n' ' ')" >> $@ - printf 'OBJECTS=%s\n' "$$(ls -1 *.c | sed -e 's/\.c$$/.o/' | tr '\n' ' ')" >> $@ - printf 'L10N=%s\n' "$$(ls -1 *.msg 2>/dev/null | sed -e 's/\.msg$$/\.cat/' | tr '\n' ' ')" >> $@ - printf 'L11N=' >> $@ - sed -ne '/^\/\*\*cat/,/cat\*\*\//p;' *.c | head -n1 | awk '{print $$2 ".cat"}' >> $@ - printf '\n' >> $@ - printf 'all: $$(UTILITY) $$(L10N)\n\n' >> $@ - printf '$$(UTILITY): $$(OBJECTS) $$(HEADERS)\n\n' >> $@ - printf '.msg.cat:\n\tgencat $$@ $$<\n\n' >> $@ - printf ".c.cat:\n\tsed -ne '/^\/\*\*cat/,/cat\*\*\//p;' $$< | grep -v ^/ | grep -v ^\* | gencat \$$@ -\n\n" >> $@ - printf 'clean:\n\trm -f *.o $$(L10N) $$(UTILITY)\n\n' >> $@ +clean: + rm -f $(PROGRAM) $(SYMLINKS) $(GENERATED) *.o @@ -0,0 +1,11 @@ +#ifndef CPP_H +#define CPP_H +#include <stdio.h> + +int preprocess(FILE *in, FILE *out, long int version); +void define(const char *macro, const char *replacement); +void undef(const char *macro); +int defined(const char *macro); +void include(const char *path); + +#endif @@ -0,0 +1,174 @@ +%{ +#define _XOPEN_SOURCE 700 +#include <errno.h> +#include <search.h> +#include <stdio.h> +#include <string.h> +#include "version.h" + +#define HASHSIZE 1024 + +int cpplex(void); +int cpperror(const char *s); +%} + +%token IDENTIFIER +%token LPAREN +%token H_CHAR Q_CHAR +%token DIGIT +%token IF IFDEF IFNDEF ELIF ELSE ENDIF +%token INCLUDE DEFINE UNDEF LINE ERROR PRAGMA +%token OPERATOR PUNCTUATOR +%token CHARACTER_CONSTANT STRING_LITERAL +%token sign identifier_list constant_expression + +%start preprocessing_file + +%% + +preprocessing_file : /* empty */ + | group + ; + +group : group_part + | group group_part; + +group_part : new_line + | pp_tokens new_line + | if_section + | control_line + ; + +if_section : if_group endif_line + | if_group elif_groups endif_line + | if_group else_group endif_line + | if_group elif_groups else_group endif_line + ; + +if_group : '#' IF constant_expression new_line + | '#' IF constant_expression new_line group + | '#' IFDEF IDENTIFIER new_line + | '#' IFDEF IDENTIFIER new_line group + | '#' IFNDEF IDENTIFIER new_line + | '#' IFNDEF IDENTIFIER new_line group + ; + +elif_groups : elif_group + | elif_groups elif_group + ; + +elif_group : '#' ELIF constant_expression new_line + | '#' ELIF constant_expression new_line group + ; + +else_group : '#' ELSE new_line + | '#' ELSE new_line group + ; + +endif_line : '#' ENDIF new_line + ; + +control_line : '#' INCLUDE pp_tokens new_line + | '#' DEFINE IDENTIFIER replacement_list new_line + | '#' DEFINE IDENTIFIER LPAREN ')' replacement_list new_line + | '#' DEFINE IDENTIFIER LPAREN identifier_list ')' replacement_list new_line + | '#' UNDEF IDENTIFIER new_line + | '#' LINE pp_tokens new_line + | '#' ERROR new_line + | '#' ERROR pp_tokens new_line + | '#' PRAGMA new_line + | '#' PRAGMA pp_tokens new_line + | '#' new_line + ; + +/* LPAREN: '(' without preceding whitespace */ + +replacement_list : /* empty */ + | pp_tokens + ; + +pp_tokens : preprocessing_token + | pp_tokens preprocessing_token + ; + +new_line : '\n' + ; + +preprocessing_token : header_name + | IDENTIFIER + | pp_number + | CHARACTER_CONSTANT + | STRING_LITERAL + | OPERATOR + | PUNCTUATOR + ; + +header_name : '<' h_char_sequence '>' + | '"' q_char_sequence '"' + ; + +h_char_sequence : H_CHAR + | h_char_sequence H_CHAR + ; + +q_char_sequence : Q_CHAR + | q_char_sequence Q_CHAR + ; + +pp_number : DIGIT + | '.' DIGIT + | pp_number DIGIT + /* | pp_number nondigit */ + | pp_number 'e' sign + | pp_number 'E' sign + | pp_number '.' + ; + +%% + +int cpplex(void) +{ + return 0; +} + +int cpperror(const char *s) +{ + (void)s; + return 0; +} + +void define(const char *macro, const char *replacement) +{ + static int created = 0; + if (!created) { + hcreate(HASHSIZE); + } + ENTRY e = { + .key = strdup(macro), + .data = strdup(replacement) + }; + ENTRY *p = hsearch(e, ENTER); + if (p == NULL) { + cpperror(strerror(ENOMEM)); + } + printf("replacing '%s' with '%s'\n", macro, replacement == (char*)-1 ? "INTERNAL" : replacement); +} + +void undef(const char *macro) +{ + printf("undefining '%s'\n", macro); +} + +void include(const char *path) +{ + printf("adding '%s' to INCLUDE path\n", path); +} + +int preprocess(FILE *in, FILE *out, long int version) +{ + printf("preprocessing\n"); + int c = 0; + while ((c = fgetc(in)) != EOF) { + fputc(c, out); + } +} @@ -0,0 +1,7 @@ +#include <stdio.h> +#include "link.h" + +void libpath(const char *path) +{ + printf("adding '%s' to library path\n", path); +} @@ -0,0 +1,6 @@ +#ifndef LINK_H +#define LINK_H + +void libpath(const char *path); + +#endif @@ -0,0 +1,258 @@ +#define _XOPEN_SOURCE 700 +#include <errno.h> +#include <libgen.h> +#include <string.h> +#include <stdio.h> +#include <stdlib.h> + +#include "cpp.h" +#include "link.h" +#include "trigraph.h" +#include "version.h" + +static char **sources = NULL; +size_t nsources = 0; + +static char **objects = NULL; +size_t nobjects = 0; + +void compile(FILE *in, FILE *out, long int version) +{ + printf("compiling\n"); +} + +void assemble(FILE *in, FILE *out) +{ + printf("assembling\n"); +} + +long int setversion(const char *progname) +{ + long int version = LATEST; + + define("__STDC__", "1"); + /* define("__FILE__", argv[optind]); */ + /* define("__DATE__", (char*)-1); */ + /* define("__TIME__", (char*)-1); */ + + if (!strcmp(progname, "c18")) { + version = C18; + } else if (!strcmp(progname, "c11")) { + /* warn */ + version = C11; + } else if (!strcmp(progname, "c99")) { + version = C99; + } else if (!strcmp(progname, "c89")) { + version = C95; + } + + /* if (!defined("__STDC_VERSION__") { */ + /* version = C89; */ + /* } */ + + /* if (!defined("__STDC__")) { */ + /* version = KNR; */ + /* } */ + + if (version >= C95) { + char ppversion[8] = ""; + sprintf(ppversion, "%dL", version); + define("__STDC_VERSION__", ppversion); + } + + if (version >= C99) { + define("__STDC_HOSTED__", "1"); + define("__STDC_IEC_559__", "1"); + define("__STDC_IEC_559_COMPLEX__", "1"); + define("__STDC_ISO_10646__", ISO_10646_VERSION); + } + + if (version >= C11) { + define("__STDC_UTF_16__", "1"); + define("__STDC_UTF_32__", "1"); + define("__STDC_ANALYZABLE__", "1"); + define("__STDC_LIB_EXT1__", "1"); + /* __STDC_NO_ATOMICS__ */ + /* __STDC_NO_COMPLEX__ */ + /* __STDC_NO_THREADS__ */ + /* __STDC_NO_VLA__ */ + } + + return version; +} + +static int getoptarg(char *argv[], int i, char **arg) +{ + if (argv[i][2] != '\0') { + *arg = argv[i] + 2; + return 0; + } + *arg = argv[i + 1]; + return 1; +} + +void addobj(char *path) +{ + nobjects++; + objects = realloc(objects, sizeof(*objects) * nobjects); + objects[nobjects - 1] = path; + + printf("adding '%s' to list of objects\n", path); +} + +void addsource(char *path) +{ + nsources++; + sources = realloc(sources, sizeof(*sources) * nsources); + sources[nsources - 1] = path; + + printf("adding '%s' to list of source files\n", path); + + char obj[strlen(path) + 1]; + strcpy(obj, path); + obj[strlen(obj) - 1] = 'o'; + addobj(obj); +} + +int main(int argc, char *argv[]) +{ + int cpp_stdout = 0; + int skip_linking = 0; + char *progname = basename(argv[0]); + char *output = "a.out"; + + /* special cases for different program names */ + if (!strcmp(progname, "lint")) { + } else if (!strcmp(progname, "cflow")) { + } else if (!strcmp(progname, "ctags")) { + } + + for (int i = 1; i < argc; i++) { + if (!strcmp(argv[i], "-c")) { + skip_linking = 1; + + } else if (!strcmp(argv[i], "-g")) { + /* include debugging symbols */ + + } else if (!strcmp(argv[i], "-s")) { + /* strip */ + + } else if (!strcmp(argv[i], "-E")) { + cpp_stdout = 1; + + } else if (!strncmp(argv[i], "-o", 2)) { + i += getoptarg(argv, i, &output); + + } else if (!strncmp(argv[i], "-D", 2)) { + char *macro = NULL; + i += getoptarg(argv, i, ¯o); + char *eq = strchr(macro, '='); + if (eq) { + *eq = '\0'; + eq++; + } + define(macro, eq ? eq : "1"); + + } else if (!strncmp(argv[i], "-I", 2)) { + char *path = NULL; + i += getoptarg(argv, i, &path); + include(path); + + } else if (!strncmp(argv[i], "-L", 2)) { + char *path = NULL; + i += getoptarg(argv, i, &path); + libpath(path); + + } else if (!strncmp(argv[i], "-O", 2)) { + /* optimize */ + + } else if (!strncmp(argv[i], "-U", 2)) { + char *macro = NULL; + i += getoptarg(argv, i, ¯o); + undef(macro); + + } else if (!strncmp(argv[i], "-l", 2)) { + char *lib = NULL; + i += getoptarg(argv, i, &lib); + addobj(lib); + + } else if (argv[i][0] == '-') { + fprintf(stderr, "%s: unknown option %s\n", progname, argv[i]); + /* invalid option */ + } else { + addsource(argv[i]); + } + } + + long int version = setversion(progname); + + for (size_t i = 0; i < nsources; i++) { + printf("compiling '%s'\n", sources[i]); + FILE *in = fopen(sources[i], "r"); + if (in == NULL) { + fprintf(stderr, "%s: %s: %s\n", progname, sources[i], strerror(errno)); + continue; + } + FILE *out = tmpfile(); + if (out == NULL) { + fprintf(stderr, "%s: tmpfile(): %s\n", progname, strerror(errno)); + continue; + } + + char *dot = strrchr(sources[i], '.'); + if (!dot || strlen(dot) != 2) { + fprintf(stderr, "%s: don't know what to do with %s\n", progname, sources[i]); + return 1; + } + + define("__FILE__", sources[i]); + switch (dot[1]) { + case 'c': + trigraph(in, out); + + fclose(in); + in = out; + rewind(in); + + if (cpp_stdout) { + out = stdout; + } else { + out = tmpfile(); + } + preprocess(in, out, version); + if (cpp_stdout) { + break; + } + + fclose(in); + in = out; + rewind(in); + out = tmpfile(); + /* FALLTHRU */ + + case 'i': + compile(in, out, version); + /* if (-S) break; */ + + fclose(in); + in = out; + rewind(in); + out = tmpfile(); + /* FALLTHRU */ + + case 's': + assemble(in, out); + break; + + case 'a': + case 'o': + /* just link at the end */ + break; + + default: + fprintf(stderr, "%s: unknown file type %s\n", progname, sources[i]); + } + } + + /* link(outfile, nobjects, objects); */ +} diff --git a/trigraph.c b/trigraph.c deleted file mode 100644 index b80b198..0000000 --- a/trigraph.c +++ /dev/null @@ -1,53 +0,0 @@ -#include <stdio.h> - -int main(int argc, char *argv[]) -{ - FILE *in = stdin; - - if (argc > 2) { - printf("usage: %s [file]\n", argv[0]); - return 1; - } - - if (argc == 2) { - in = fopen(argv[1], "r"); - if (in == NULL) { - perror(argv[1]); - return 1; - } - } - - int c; - int q = 0; - while ((c = fgetc(in)) != EOF) { - if (q == 2) { - switch (c) { - case '=': putchar('#'); break; - case '/': putchar('\\'); break; - case '\'': putchar('^'); break; - case '(': putchar('['); break; - case ')': putchar(']'); break; - case '!': putchar('|'); break; - case '<': putchar('{'); break; - case '>': putchar('}'); break; - case '-': putchar('~'); break; - case '?': putchar('?'); break; - - default: - printf("??%c", c); - break; - } - - if (c != '?') { - q = 0; - } - } else if (c == '?') { - q++; - } else { - putchar(c); - q = 0; - } - } - - return 0; -} diff --git a/trigraph.h b/trigraph.h new file mode 100644 index 0000000..8390485 --- /dev/null +++ b/trigraph.h @@ -0,0 +1,6 @@ +#ifndef TRIGRAPH_H +#define TRIGRAPH_H +#include <stdio.h> + +int trigraph(FILE *in, FILE *out); +#endif diff --git a/trigraph.l b/trigraph.l new file mode 100644 index 0000000..e9c4ca2 --- /dev/null +++ b/trigraph.l @@ -0,0 +1,44 @@ +%{ +#include <stdio.h> +#include "trigraph.h" + +#define yylex tglex + +static FILE *tgout; +static void replace_trigraph(int c); +static size_t tgline = 0; +static size_t tgchar = 0; +%} + +%% + +\?\?[=\/'\(\)!<>\-?] { replace_trigraph(yytext[2]); } +\n { tgline++; tgchar = 0; fputc(yytext[0], tgout); } +. { tgchar++; fputc(yytext[0], tgout); } + +%% + +static void replace_trigraph(int c) +{ + static int tg[] = { + ['='] = '#', + ['/'] = '\\', + ['\''] = '^', + ['('] = '[', + [')'] = ']', + ['!'] = '|', + ['<'] = '>', + ['-'] = '~', + ['?'] = '?', + }; + fprintf(stderr, "warning: replaced trigraph '??%c' with '%c' at __FILE__:%zd:%zd\n", + c, tg[c], /*__FILE__,*/ tgline, tgchar); + fputc(tg[c], tgout); +} + +int trigraph(FILE *in, FILE *out) +{ + yyin = in; + tgout = out; + return tglex(); +} diff --git a/version.h b/version.h new file mode 100644 index 0000000..c5e4016 --- /dev/null +++ b/version.h @@ -0,0 +1,14 @@ +#ifndef VERSION_H +#define VERSION_H + +#define C18 201710L +#define C11 201112L +#define C99 199901L +#define C95 199409L +#define C89 1L +#define KNR 0L +#define LATEST C18 + +#define ISO_10646_VERSION "201910L" + +#endif |