From 5a8d69ea869636e29807955d621fa45d8a21a8d8 Mon Sep 17 00:00:00 2001 From: Jakob Kaivo Date: Mon, 2 Sep 2019 16:57:47 -0400 Subject: flesh out skeleton of process --- .gitignore | 10 +++ Makefile | 42 ++++------ cpp.h | 11 +++ cpp.y | 174 +++++++++++++++++++++++++++++++++++++++++ link.c | 7 ++ link.h | 6 ++ main.c | 258 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ trigraph.c | 53 ------------- trigraph.h | 6 ++ trigraph.l | 44 +++++++++++ version.h | 14 ++++ 11 files changed, 546 insertions(+), 79 deletions(-) create mode 100644 .gitignore create mode 100644 cpp.h create mode 100644 cpp.y create mode 100644 link.c create mode 100644 link.h create mode 100644 main.c delete mode 100644 trigraph.c create mode 100644 trigraph.h create mode 100644 trigraph.l create mode 100644 version.h diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7f5d6d0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,10 @@ +cc +c89 +c99 +lint +cflow +ctags +*.o +trigraph.c +*.tab.c +*.tab.h diff --git a/Makefile b/Makefile index 4e77ce4..1182d1b 100644 --- a/Makefile +++ b/Makefile @@ -1,33 +1,23 @@ .POSIX: -.SILENT: all clean .util.mk +CFLAGS=-g -D_POSIX_C_SOURCE=200809L +YFLAGS=-d +LIBS=-ly -ll +PROGRAM=cc +SYMLINKS=c89 c99 lint cflow ctags +OBJECTS=main.o trigraph.o cpp.tab.o link.o +GENERATED=trigraph.c *.tab.c *.tab.h -CFLAGS=-std=gnu99 -D_XOPEN_SOURCE=700 -g +all: $(PROGRAM) $(SYMLINKS) -all: .util.mk - make -f .util.mk --no-print-directory +$(SYMLINKS): $(PROGRAM) + ln -s $(PROGRAM) $@ -deps: clean .util.mk +cpp.tab.c cpp.tab.h: cpp.y + $(YACC) $(YFLAGS) -p cpp -b cpp cpp.y -clean: - [ -f .util.mk ] && make -k --no-print-directory -f .util.mk clean || true - rm -f .util.mk +cc: $(OBJECTS) + $(CC) -o $@ $(OBJECTS) $(LIBS) -.util.mk: . Makefile - printf '.POSIX:\n\n' > $@ - printf '.SUFFIXES: .cat .msg\n\n' >> $@ - printf 'default: all\n\n' >> $@ - printf 'CFLAGS=$(CFLAGS)\n' >> $@ - printf 'UTILITY=%s\n' "$$(basename -s .c $$(grep -l ^main *.c | head -n1))" >> $@ - printf 'SOURCES=%s\n' "$$(ls -1 *.c | tr '\n' ' ')" >> $@ - printf 'HEADERS=%s\n' "$$(ls -1 *.h 2>/dev/null | tr '\n' ' ')" >> $@ - printf 'OBJECTS=%s\n' "$$(ls -1 *.c | sed -e 's/\.c$$/.o/' | tr '\n' ' ')" >> $@ - printf 'L10N=%s\n' "$$(ls -1 *.msg 2>/dev/null | sed -e 's/\.msg$$/\.cat/' | tr '\n' ' ')" >> $@ - printf 'L11N=' >> $@ - sed -ne '/^\/\*\*cat/,/cat\*\*\//p;' *.c | head -n1 | awk '{print $$2 ".cat"}' >> $@ - printf '\n' >> $@ - printf 'all: $$(UTILITY) $$(L10N)\n\n' >> $@ - printf '$$(UTILITY): $$(OBJECTS) $$(HEADERS)\n\n' >> $@ - printf '.msg.cat:\n\tgencat $$@ $$<\n\n' >> $@ - printf ".c.cat:\n\tsed -ne '/^\/\*\*cat/,/cat\*\*\//p;' $$< | grep -v ^/ | grep -v ^\* | gencat \$$@ -\n\n" >> $@ - printf 'clean:\n\trm -f *.o $$(L10N) $$(UTILITY)\n\n' >> $@ +clean: + rm -f $(PROGRAM) $(SYMLINKS) $(GENERATED) *.o diff --git a/cpp.h b/cpp.h new file mode 100644 index 0000000..9ee4769 --- /dev/null +++ b/cpp.h @@ -0,0 +1,11 @@ +#ifndef CPP_H +#define CPP_H +#include + +int preprocess(FILE *in, FILE *out, long int version); +void define(const char *macro, const char *replacement); +void undef(const char *macro); +int defined(const char *macro); +void include(const char *path); + +#endif diff --git a/cpp.y b/cpp.y new file mode 100644 index 0000000..798e976 --- /dev/null +++ b/cpp.y @@ -0,0 +1,174 @@ +%{ +#define _XOPEN_SOURCE 700 +#include +#include +#include +#include +#include "version.h" + +#define HASHSIZE 1024 + +int cpplex(void); +int cpperror(const char *s); +%} + +%token IDENTIFIER +%token LPAREN +%token H_CHAR Q_CHAR +%token DIGIT +%token IF IFDEF IFNDEF ELIF ELSE ENDIF +%token INCLUDE DEFINE UNDEF LINE ERROR PRAGMA +%token OPERATOR PUNCTUATOR +%token CHARACTER_CONSTANT STRING_LITERAL +%token sign identifier_list constant_expression + +%start preprocessing_file + +%% + +preprocessing_file : /* empty */ + | group + ; + +group : group_part + | group group_part; + +group_part : new_line + | pp_tokens new_line + | if_section + | control_line + ; + +if_section : if_group endif_line + | if_group elif_groups endif_line + | if_group else_group endif_line + | if_group elif_groups else_group endif_line + ; + +if_group : '#' IF constant_expression new_line + | '#' IF constant_expression new_line group + | '#' IFDEF IDENTIFIER new_line + | '#' IFDEF IDENTIFIER new_line group + | '#' IFNDEF IDENTIFIER new_line + | '#' IFNDEF IDENTIFIER new_line group + ; + +elif_groups : elif_group + | elif_groups elif_group + ; + +elif_group : '#' ELIF constant_expression new_line + | '#' ELIF constant_expression new_line group + ; + +else_group : '#' ELSE new_line + | '#' ELSE new_line group + ; + +endif_line : '#' ENDIF new_line + ; + +control_line : '#' INCLUDE pp_tokens new_line + | '#' DEFINE IDENTIFIER replacement_list new_line + | '#' DEFINE IDENTIFIER LPAREN ')' replacement_list new_line + | '#' DEFINE IDENTIFIER LPAREN identifier_list ')' replacement_list new_line + | '#' UNDEF IDENTIFIER new_line + | '#' LINE pp_tokens new_line + | '#' ERROR new_line + | '#' ERROR pp_tokens new_line + | '#' PRAGMA new_line + | '#' PRAGMA pp_tokens new_line + | '#' new_line + ; + +/* LPAREN: '(' without preceding whitespace */ + +replacement_list : /* empty */ + | pp_tokens + ; + +pp_tokens : preprocessing_token + | pp_tokens preprocessing_token + ; + +new_line : '\n' + ; + +preprocessing_token : header_name + | IDENTIFIER + | pp_number + | CHARACTER_CONSTANT + | STRING_LITERAL + | OPERATOR + | PUNCTUATOR + ; + +header_name : '<' h_char_sequence '>' + | '"' q_char_sequence '"' + ; + +h_char_sequence : H_CHAR + | h_char_sequence H_CHAR + ; + +q_char_sequence : Q_CHAR + | q_char_sequence Q_CHAR + ; + +pp_number : DIGIT + | '.' DIGIT + | pp_number DIGIT + /* | pp_number nondigit */ + | pp_number 'e' sign + | pp_number 'E' sign + | pp_number '.' + ; + +%% + +int cpplex(void) +{ + return 0; +} + +int cpperror(const char *s) +{ + (void)s; + return 0; +} + +void define(const char *macro, const char *replacement) +{ + static int created = 0; + if (!created) { + hcreate(HASHSIZE); + } + ENTRY e = { + .key = strdup(macro), + .data = strdup(replacement) + }; + ENTRY *p = hsearch(e, ENTER); + if (p == NULL) { + cpperror(strerror(ENOMEM)); + } + printf("replacing '%s' with '%s'\n", macro, replacement == (char*)-1 ? "INTERNAL" : replacement); +} + +void undef(const char *macro) +{ + printf("undefining '%s'\n", macro); +} + +void include(const char *path) +{ + printf("adding '%s' to INCLUDE path\n", path); +} + +int preprocess(FILE *in, FILE *out, long int version) +{ + printf("preprocessing\n"); + int c = 0; + while ((c = fgetc(in)) != EOF) { + fputc(c, out); + } +} diff --git a/link.c b/link.c new file mode 100644 index 0000000..a22983d --- /dev/null +++ b/link.c @@ -0,0 +1,7 @@ +#include +#include "link.h" + +void libpath(const char *path) +{ + printf("adding '%s' to library path\n", path); +} diff --git a/link.h b/link.h new file mode 100644 index 0000000..f86618d --- /dev/null +++ b/link.h @@ -0,0 +1,6 @@ +#ifndef LINK_H +#define LINK_H + +void libpath(const char *path); + +#endif diff --git a/main.c b/main.c new file mode 100644 index 0000000..6ad8160 --- /dev/null +++ b/main.c @@ -0,0 +1,258 @@ +#define _XOPEN_SOURCE 700 +#include +#include +#include +#include +#include + +#include "cpp.h" +#include "link.h" +#include "trigraph.h" +#include "version.h" + +static char **sources = NULL; +size_t nsources = 0; + +static char **objects = NULL; +size_t nobjects = 0; + +void compile(FILE *in, FILE *out, long int version) +{ + printf("compiling\n"); +} + +void assemble(FILE *in, FILE *out) +{ + printf("assembling\n"); +} + +long int setversion(const char *progname) +{ + long int version = LATEST; + + define("__STDC__", "1"); + /* define("__FILE__", argv[optind]); */ + /* define("__DATE__", (char*)-1); */ + /* define("__TIME__", (char*)-1); */ + + if (!strcmp(progname, "c18")) { + version = C18; + } else if (!strcmp(progname, "c11")) { + /* warn */ + version = C11; + } else if (!strcmp(progname, "c99")) { + version = C99; + } else if (!strcmp(progname, "c89")) { + version = C95; + } + + /* if (!defined("__STDC_VERSION__") { */ + /* version = C89; */ + /* } */ + + /* if (!defined("__STDC__")) { */ + /* version = KNR; */ + /* } */ + + if (version >= C95) { + char ppversion[8] = ""; + sprintf(ppversion, "%dL", version); + define("__STDC_VERSION__", ppversion); + } + + if (version >= C99) { + define("__STDC_HOSTED__", "1"); + define("__STDC_IEC_559__", "1"); + define("__STDC_IEC_559_COMPLEX__", "1"); + define("__STDC_ISO_10646__", ISO_10646_VERSION); + } + + if (version >= C11) { + define("__STDC_UTF_16__", "1"); + define("__STDC_UTF_32__", "1"); + define("__STDC_ANALYZABLE__", "1"); + define("__STDC_LIB_EXT1__", "1"); + /* __STDC_NO_ATOMICS__ */ + /* __STDC_NO_COMPLEX__ */ + /* __STDC_NO_THREADS__ */ + /* __STDC_NO_VLA__ */ + } + + return version; +} + +static int getoptarg(char *argv[], int i, char **arg) +{ + if (argv[i][2] != '\0') { + *arg = argv[i] + 2; + return 0; + } + *arg = argv[i + 1]; + return 1; +} + +void addobj(char *path) +{ + nobjects++; + objects = realloc(objects, sizeof(*objects) * nobjects); + objects[nobjects - 1] = path; + + printf("adding '%s' to list of objects\n", path); +} + +void addsource(char *path) +{ + nsources++; + sources = realloc(sources, sizeof(*sources) * nsources); + sources[nsources - 1] = path; + + printf("adding '%s' to list of source files\n", path); + + char obj[strlen(path) + 1]; + strcpy(obj, path); + obj[strlen(obj) - 1] = 'o'; + addobj(obj); +} + +int main(int argc, char *argv[]) +{ + int cpp_stdout = 0; + int skip_linking = 0; + char *progname = basename(argv[0]); + char *output = "a.out"; + + /* special cases for different program names */ + if (!strcmp(progname, "lint")) { + } else if (!strcmp(progname, "cflow")) { + } else if (!strcmp(progname, "ctags")) { + } + + for (int i = 1; i < argc; i++) { + if (!strcmp(argv[i], "-c")) { + skip_linking = 1; + + } else if (!strcmp(argv[i], "-g")) { + /* include debugging symbols */ + + } else if (!strcmp(argv[i], "-s")) { + /* strip */ + + } else if (!strcmp(argv[i], "-E")) { + cpp_stdout = 1; + + } else if (!strncmp(argv[i], "-o", 2)) { + i += getoptarg(argv, i, &output); + + } else if (!strncmp(argv[i], "-D", 2)) { + char *macro = NULL; + i += getoptarg(argv, i, ¯o); + char *eq = strchr(macro, '='); + if (eq) { + *eq = '\0'; + eq++; + } + define(macro, eq ? eq : "1"); + + } else if (!strncmp(argv[i], "-I", 2)) { + char *path = NULL; + i += getoptarg(argv, i, &path); + include(path); + + } else if (!strncmp(argv[i], "-L", 2)) { + char *path = NULL; + i += getoptarg(argv, i, &path); + libpath(path); + + } else if (!strncmp(argv[i], "-O", 2)) { + /* optimize */ + + } else if (!strncmp(argv[i], "-U", 2)) { + char *macro = NULL; + i += getoptarg(argv, i, ¯o); + undef(macro); + + } else if (!strncmp(argv[i], "-l", 2)) { + char *lib = NULL; + i += getoptarg(argv, i, &lib); + addobj(lib); + + } else if (argv[i][0] == '-') { + fprintf(stderr, "%s: unknown option %s\n", progname, argv[i]); + /* invalid option */ + } else { + addsource(argv[i]); + } + } + + long int version = setversion(progname); + + for (size_t i = 0; i < nsources; i++) { + printf("compiling '%s'\n", sources[i]); + FILE *in = fopen(sources[i], "r"); + if (in == NULL) { + fprintf(stderr, "%s: %s: %s\n", progname, sources[i], strerror(errno)); + continue; + } + FILE *out = tmpfile(); + if (out == NULL) { + fprintf(stderr, "%s: tmpfile(): %s\n", progname, strerror(errno)); + continue; + } + + char *dot = strrchr(sources[i], '.'); + if (!dot || strlen(dot) != 2) { + fprintf(stderr, "%s: don't know what to do with %s\n", progname, sources[i]); + return 1; + } + + define("__FILE__", sources[i]); + switch (dot[1]) { + case 'c': + trigraph(in, out); + + fclose(in); + in = out; + rewind(in); + + if (cpp_stdout) { + out = stdout; + } else { + out = tmpfile(); + } + preprocess(in, out, version); + if (cpp_stdout) { + break; + } + + fclose(in); + in = out; + rewind(in); + out = tmpfile(); + /* FALLTHRU */ + + case 'i': + compile(in, out, version); + /* if (-S) break; */ + + fclose(in); + in = out; + rewind(in); + out = tmpfile(); + /* FALLTHRU */ + + case 's': + assemble(in, out); + break; + + case 'a': + case 'o': + /* just link at the end */ + break; + + default: + fprintf(stderr, "%s: unknown file type %s\n", progname, sources[i]); + } + } + + /* link(outfile, nobjects, objects); */ +} diff --git a/trigraph.c b/trigraph.c deleted file mode 100644 index b80b198..0000000 --- a/trigraph.c +++ /dev/null @@ -1,53 +0,0 @@ -#include - -int main(int argc, char *argv[]) -{ - FILE *in = stdin; - - if (argc > 2) { - printf("usage: %s [file]\n", argv[0]); - return 1; - } - - if (argc == 2) { - in = fopen(argv[1], "r"); - if (in == NULL) { - perror(argv[1]); - return 1; - } - } - - int c; - int q = 0; - while ((c = fgetc(in)) != EOF) { - if (q == 2) { - switch (c) { - case '=': putchar('#'); break; - case '/': putchar('\\'); break; - case '\'': putchar('^'); break; - case '(': putchar('['); break; - case ')': putchar(']'); break; - case '!': putchar('|'); break; - case '<': putchar('{'); break; - case '>': putchar('}'); break; - case '-': putchar('~'); break; - case '?': putchar('?'); break; - - default: - printf("??%c", c); - break; - } - - if (c != '?') { - q = 0; - } - } else if (c == '?') { - q++; - } else { - putchar(c); - q = 0; - } - } - - return 0; -} diff --git a/trigraph.h b/trigraph.h new file mode 100644 index 0000000..8390485 --- /dev/null +++ b/trigraph.h @@ -0,0 +1,6 @@ +#ifndef TRIGRAPH_H +#define TRIGRAPH_H +#include + +int trigraph(FILE *in, FILE *out); +#endif diff --git a/trigraph.l b/trigraph.l new file mode 100644 index 0000000..e9c4ca2 --- /dev/null +++ b/trigraph.l @@ -0,0 +1,44 @@ +%{ +#include +#include "trigraph.h" + +#define yylex tglex + +static FILE *tgout; +static void replace_trigraph(int c); +static size_t tgline = 0; +static size_t tgchar = 0; +%} + +%% + +\?\?[=\/'\(\)!<>\-?] { replace_trigraph(yytext[2]); } +\n { tgline++; tgchar = 0; fputc(yytext[0], tgout); } +. { tgchar++; fputc(yytext[0], tgout); } + +%% + +static void replace_trigraph(int c) +{ + static int tg[] = { + ['='] = '#', + ['/'] = '\\', + ['\''] = '^', + ['('] = '[', + [')'] = ']', + ['!'] = '|', + ['<'] = '>', + ['-'] = '~', + ['?'] = '?', + }; + fprintf(stderr, "warning: replaced trigraph '??%c' with '%c' at __FILE__:%zd:%zd\n", + c, tg[c], /*__FILE__,*/ tgline, tgchar); + fputc(tg[c], tgout); +} + +int trigraph(FILE *in, FILE *out) +{ + yyin = in; + tgout = out; + return tglex(); +} diff --git a/version.h b/version.h new file mode 100644 index 0000000..c5e4016 --- /dev/null +++ b/version.h @@ -0,0 +1,14 @@ +#ifndef VERSION_H +#define VERSION_H + +#define C18 201710L +#define C11 201112L +#define C99 199901L +#define C95 199409L +#define C89 1L +#define KNR 0L +#define LATEST C18 + +#define ISO_10646_VERSION "201910L" + +#endif -- cgit v1.2.1