summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJakob Kaivo <jkk@ung.org>2019-09-02 16:57:47 -0400
committerJakob Kaivo <jkk@ung.org>2019-09-02 16:57:47 -0400
commit5a8d69ea869636e29807955d621fa45d8a21a8d8 (patch)
treeb3cceba43120ce5871ada08f78ea9fc086c4843e
parent39ecba0032be794a1f4d66f61e09e4910270330f (diff)
flesh out skeleton of process
-rw-r--r--.gitignore10
-rw-r--r--Makefile42
-rw-r--r--cpp.h11
-rw-r--r--cpp.y174
-rw-r--r--link.c7
-rw-r--r--link.h6
-rw-r--r--main.c258
-rw-r--r--trigraph.c53
-rw-r--r--trigraph.h6
-rw-r--r--trigraph.l44
-rw-r--r--version.h14
11 files changed, 546 insertions, 79 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..7f5d6d0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,10 @@
+cc
+c89
+c99
+lint
+cflow
+ctags
+*.o
+trigraph.c
+*.tab.c
+*.tab.h
diff --git a/Makefile b/Makefile
index 4e77ce4..1182d1b 100644
--- a/Makefile
+++ b/Makefile
@@ -1,33 +1,23 @@
.POSIX:
-.SILENT: all clean .util.mk
+CFLAGS=-g -D_POSIX_C_SOURCE=200809L
+YFLAGS=-d
+LIBS=-ly -ll
+PROGRAM=cc
+SYMLINKS=c89 c99 lint cflow ctags
+OBJECTS=main.o trigraph.o cpp.tab.o link.o
+GENERATED=trigraph.c *.tab.c *.tab.h
-CFLAGS=-std=gnu99 -D_XOPEN_SOURCE=700 -g
+all: $(PROGRAM) $(SYMLINKS)
-all: .util.mk
- make -f .util.mk --no-print-directory
+$(SYMLINKS): $(PROGRAM)
+ ln -s $(PROGRAM) $@
-deps: clean .util.mk
+cpp.tab.c cpp.tab.h: cpp.y
+ $(YACC) $(YFLAGS) -p cpp -b cpp cpp.y
-clean:
- [ -f .util.mk ] && make -k --no-print-directory -f .util.mk clean || true
- rm -f .util.mk
+cc: $(OBJECTS)
+ $(CC) -o $@ $(OBJECTS) $(LIBS)
-.util.mk: . Makefile
- printf '.POSIX:\n\n' > $@
- printf '.SUFFIXES: .cat .msg\n\n' >> $@
- printf 'default: all\n\n' >> $@
- printf 'CFLAGS=$(CFLAGS)\n' >> $@
- printf 'UTILITY=%s\n' "$$(basename -s .c $$(grep -l ^main *.c | head -n1))" >> $@
- printf 'SOURCES=%s\n' "$$(ls -1 *.c | tr '\n' ' ')" >> $@
- printf 'HEADERS=%s\n' "$$(ls -1 *.h 2>/dev/null | tr '\n' ' ')" >> $@
- printf 'OBJECTS=%s\n' "$$(ls -1 *.c | sed -e 's/\.c$$/.o/' | tr '\n' ' ')" >> $@
- printf 'L10N=%s\n' "$$(ls -1 *.msg 2>/dev/null | sed -e 's/\.msg$$/\.cat/' | tr '\n' ' ')" >> $@
- printf 'L11N=' >> $@
- sed -ne '/^\/\*\*cat/,/cat\*\*\//p;' *.c | head -n1 | awk '{print $$2 ".cat"}' >> $@
- printf '\n' >> $@
- printf 'all: $$(UTILITY) $$(L10N)\n\n' >> $@
- printf '$$(UTILITY): $$(OBJECTS) $$(HEADERS)\n\n' >> $@
- printf '.msg.cat:\n\tgencat $$@ $$<\n\n' >> $@
- printf ".c.cat:\n\tsed -ne '/^\/\*\*cat/,/cat\*\*\//p;' $$< | grep -v ^/ | grep -v ^\* | gencat \$$@ -\n\n" >> $@
- printf 'clean:\n\trm -f *.o $$(L10N) $$(UTILITY)\n\n' >> $@
+clean:
+ rm -f $(PROGRAM) $(SYMLINKS) $(GENERATED) *.o
diff --git a/cpp.h b/cpp.h
new file mode 100644
index 0000000..9ee4769
--- /dev/null
+++ b/cpp.h
@@ -0,0 +1,11 @@
+#ifndef CPP_H
+#define CPP_H
+#include <stdio.h>
+
+int preprocess(FILE *in, FILE *out, long int version);
+void define(const char *macro, const char *replacement);
+void undef(const char *macro);
+int defined(const char *macro);
+void include(const char *path);
+
+#endif
diff --git a/cpp.y b/cpp.y
new file mode 100644
index 0000000..798e976
--- /dev/null
+++ b/cpp.y
@@ -0,0 +1,174 @@
+%{
+#define _XOPEN_SOURCE 700
+#include <errno.h>
+#include <search.h>
+#include <stdio.h>
+#include <string.h>
+#include "version.h"
+
+#define HASHSIZE 1024
+
+int cpplex(void);
+int cpperror(const char *s);
+%}
+
+%token IDENTIFIER
+%token LPAREN
+%token H_CHAR Q_CHAR
+%token DIGIT
+%token IF IFDEF IFNDEF ELIF ELSE ENDIF
+%token INCLUDE DEFINE UNDEF LINE ERROR PRAGMA
+%token OPERATOR PUNCTUATOR
+%token CHARACTER_CONSTANT STRING_LITERAL
+%token sign identifier_list constant_expression
+
+%start preprocessing_file
+
+%%
+
+preprocessing_file : /* empty */
+ | group
+ ;
+
+group : group_part
+ | group group_part;
+
+group_part : new_line
+ | pp_tokens new_line
+ | if_section
+ | control_line
+ ;
+
+if_section : if_group endif_line
+ | if_group elif_groups endif_line
+ | if_group else_group endif_line
+ | if_group elif_groups else_group endif_line
+ ;
+
+if_group : '#' IF constant_expression new_line
+ | '#' IF constant_expression new_line group
+ | '#' IFDEF IDENTIFIER new_line
+ | '#' IFDEF IDENTIFIER new_line group
+ | '#' IFNDEF IDENTIFIER new_line
+ | '#' IFNDEF IDENTIFIER new_line group
+ ;
+
+elif_groups : elif_group
+ | elif_groups elif_group
+ ;
+
+elif_group : '#' ELIF constant_expression new_line
+ | '#' ELIF constant_expression new_line group
+ ;
+
+else_group : '#' ELSE new_line
+ | '#' ELSE new_line group
+ ;
+
+endif_line : '#' ENDIF new_line
+ ;
+
+control_line : '#' INCLUDE pp_tokens new_line
+ | '#' DEFINE IDENTIFIER replacement_list new_line
+ | '#' DEFINE IDENTIFIER LPAREN ')' replacement_list new_line
+ | '#' DEFINE IDENTIFIER LPAREN identifier_list ')' replacement_list new_line
+ | '#' UNDEF IDENTIFIER new_line
+ | '#' LINE pp_tokens new_line
+ | '#' ERROR new_line
+ | '#' ERROR pp_tokens new_line
+ | '#' PRAGMA new_line
+ | '#' PRAGMA pp_tokens new_line
+ | '#' new_line
+ ;
+
+/* LPAREN: '(' without preceding whitespace */
+
+replacement_list : /* empty */
+ | pp_tokens
+ ;
+
+pp_tokens : preprocessing_token
+ | pp_tokens preprocessing_token
+ ;
+
+new_line : '\n'
+ ;
+
+preprocessing_token : header_name
+ | IDENTIFIER
+ | pp_number
+ | CHARACTER_CONSTANT
+ | STRING_LITERAL
+ | OPERATOR
+ | PUNCTUATOR
+ ;
+
+header_name : '<' h_char_sequence '>'
+ | '"' q_char_sequence '"'
+ ;
+
+h_char_sequence : H_CHAR
+ | h_char_sequence H_CHAR
+ ;
+
+q_char_sequence : Q_CHAR
+ | q_char_sequence Q_CHAR
+ ;
+
+pp_number : DIGIT
+ | '.' DIGIT
+ | pp_number DIGIT
+ /* | pp_number nondigit */
+ | pp_number 'e' sign
+ | pp_number 'E' sign
+ | pp_number '.'
+ ;
+
+%%
+
+int cpplex(void)
+{
+ return 0;
+}
+
+int cpperror(const char *s)
+{
+ (void)s;
+ return 0;
+}
+
+void define(const char *macro, const char *replacement)
+{
+ static int created = 0;
+ if (!created) {
+ hcreate(HASHSIZE);
+ }
+ ENTRY e = {
+ .key = strdup(macro),
+ .data = strdup(replacement)
+ };
+ ENTRY *p = hsearch(e, ENTER);
+ if (p == NULL) {
+ cpperror(strerror(ENOMEM));
+ }
+ printf("replacing '%s' with '%s'\n", macro, replacement == (char*)-1 ? "INTERNAL" : replacement);
+}
+
+void undef(const char *macro)
+{
+ printf("undefining '%s'\n", macro);
+}
+
+void include(const char *path)
+{
+ printf("adding '%s' to INCLUDE path\n", path);
+}
+
+int preprocess(FILE *in, FILE *out, long int version)
+{
+ printf("preprocessing\n");
+ int c = 0;
+ while ((c = fgetc(in)) != EOF) {
+ fputc(c, out);
+ }
+}
diff --git a/link.c b/link.c
new file mode 100644
index 0000000..a22983d
--- /dev/null
+++ b/link.c
@@ -0,0 +1,7 @@
+#include <stdio.h>
+#include "link.h"
+
+void libpath(const char *path)
+{
+ printf("adding '%s' to library path\n", path);
+}
diff --git a/link.h b/link.h
new file mode 100644
index 0000000..f86618d
--- /dev/null
+++ b/link.h
@@ -0,0 +1,6 @@
+#ifndef LINK_H
+#define LINK_H
+
+void libpath(const char *path);
+
+#endif
diff --git a/main.c b/main.c
new file mode 100644
index 0000000..6ad8160
--- /dev/null
+++ b/main.c
@@ -0,0 +1,258 @@
+#define _XOPEN_SOURCE 700
+#include <errno.h>
+#include <libgen.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "cpp.h"
+#include "link.h"
+#include "trigraph.h"
+#include "version.h"
+
+static char **sources = NULL;
+size_t nsources = 0;
+
+static char **objects = NULL;
+size_t nobjects = 0;
+
+void compile(FILE *in, FILE *out, long int version)
+{
+ printf("compiling\n");
+}
+
+void assemble(FILE *in, FILE *out)
+{
+ printf("assembling\n");
+}
+
+long int setversion(const char *progname)
+{
+ long int version = LATEST;
+
+ define("__STDC__", "1");
+ /* define("__FILE__", argv[optind]); */
+ /* define("__DATE__", (char*)-1); */
+ /* define("__TIME__", (char*)-1); */
+
+ if (!strcmp(progname, "c18")) {
+ version = C18;
+ } else if (!strcmp(progname, "c11")) {
+ /* warn */
+ version = C11;
+ } else if (!strcmp(progname, "c99")) {
+ version = C99;
+ } else if (!strcmp(progname, "c89")) {
+ version = C95;
+ }
+
+ /* if (!defined("__STDC_VERSION__") { */
+ /* version = C89; */
+ /* } */
+
+ /* if (!defined("__STDC__")) { */
+ /* version = KNR; */
+ /* } */
+
+ if (version >= C95) {
+ char ppversion[8] = "";
+ sprintf(ppversion, "%dL", version);
+ define("__STDC_VERSION__", ppversion);
+ }
+
+ if (version >= C99) {
+ define("__STDC_HOSTED__", "1");
+ define("__STDC_IEC_559__", "1");
+ define("__STDC_IEC_559_COMPLEX__", "1");
+ define("__STDC_ISO_10646__", ISO_10646_VERSION);
+ }
+
+ if (version >= C11) {
+ define("__STDC_UTF_16__", "1");
+ define("__STDC_UTF_32__", "1");
+ define("__STDC_ANALYZABLE__", "1");
+ define("__STDC_LIB_EXT1__", "1");
+ /* __STDC_NO_ATOMICS__ */
+ /* __STDC_NO_COMPLEX__ */
+ /* __STDC_NO_THREADS__ */
+ /* __STDC_NO_VLA__ */
+ }
+
+ return version;
+}
+
+static int getoptarg(char *argv[], int i, char **arg)
+{
+ if (argv[i][2] != '\0') {
+ *arg = argv[i] + 2;
+ return 0;
+ }
+ *arg = argv[i + 1];
+ return 1;
+}
+
+void addobj(char *path)
+{
+ nobjects++;
+ objects = realloc(objects, sizeof(*objects) * nobjects);
+ objects[nobjects - 1] = path;
+
+ printf("adding '%s' to list of objects\n", path);
+}
+
+void addsource(char *path)
+{
+ nsources++;
+ sources = realloc(sources, sizeof(*sources) * nsources);
+ sources[nsources - 1] = path;
+
+ printf("adding '%s' to list of source files\n", path);
+
+ char obj[strlen(path) + 1];
+ strcpy(obj, path);
+ obj[strlen(obj) - 1] = 'o';
+ addobj(obj);
+}
+
+int main(int argc, char *argv[])
+{
+ int cpp_stdout = 0;
+ int skip_linking = 0;
+ char *progname = basename(argv[0]);
+ char *output = "a.out";
+
+ /* special cases for different program names */
+ if (!strcmp(progname, "lint")) {
+ } else if (!strcmp(progname, "cflow")) {
+ } else if (!strcmp(progname, "ctags")) {
+ }
+
+ for (int i = 1; i < argc; i++) {
+ if (!strcmp(argv[i], "-c")) {
+ skip_linking = 1;
+
+ } else if (!strcmp(argv[i], "-g")) {
+ /* include debugging symbols */
+
+ } else if (!strcmp(argv[i], "-s")) {
+ /* strip */
+
+ } else if (!strcmp(argv[i], "-E")) {
+ cpp_stdout = 1;
+
+ } else if (!strncmp(argv[i], "-o", 2)) {
+ i += getoptarg(argv, i, &output);
+
+ } else if (!strncmp(argv[i], "-D", 2)) {
+ char *macro = NULL;
+ i += getoptarg(argv, i, &macro);
+ char *eq = strchr(macro, '=');
+ if (eq) {
+ *eq = '\0';
+ eq++;
+ }
+ define(macro, eq ? eq : "1");
+
+ } else if (!strncmp(argv[i], "-I", 2)) {
+ char *path = NULL;
+ i += getoptarg(argv, i, &path);
+ include(path);
+
+ } else if (!strncmp(argv[i], "-L", 2)) {
+ char *path = NULL;
+ i += getoptarg(argv, i, &path);
+ libpath(path);
+
+ } else if (!strncmp(argv[i], "-O", 2)) {
+ /* optimize */
+
+ } else if (!strncmp(argv[i], "-U", 2)) {
+ char *macro = NULL;
+ i += getoptarg(argv, i, &macro);
+ undef(macro);
+
+ } else if (!strncmp(argv[i], "-l", 2)) {
+ char *lib = NULL;
+ i += getoptarg(argv, i, &lib);
+ addobj(lib);
+
+ } else if (argv[i][0] == '-') {
+ fprintf(stderr, "%s: unknown option %s\n", progname, argv[i]);
+ /* invalid option */
+ } else {
+ addsource(argv[i]);
+ }
+ }
+
+ long int version = setversion(progname);
+
+ for (size_t i = 0; i < nsources; i++) {
+ printf("compiling '%s'\n", sources[i]);
+ FILE *in = fopen(sources[i], "r");
+ if (in == NULL) {
+ fprintf(stderr, "%s: %s: %s\n", progname, sources[i], strerror(errno));
+ continue;
+ }
+ FILE *out = tmpfile();
+ if (out == NULL) {
+ fprintf(stderr, "%s: tmpfile(): %s\n", progname, strerror(errno));
+ continue;
+ }
+
+ char *dot = strrchr(sources[i], '.');
+ if (!dot || strlen(dot) != 2) {
+ fprintf(stderr, "%s: don't know what to do with %s\n", progname, sources[i]);
+ return 1;
+ }
+
+ define("__FILE__", sources[i]);
+ switch (dot[1]) {
+ case 'c':
+ trigraph(in, out);
+
+ fclose(in);
+ in = out;
+ rewind(in);
+
+ if (cpp_stdout) {
+ out = stdout;
+ } else {
+ out = tmpfile();
+ }
+ preprocess(in, out, version);
+ if (cpp_stdout) {
+ break;
+ }
+
+ fclose(in);
+ in = out;
+ rewind(in);
+ out = tmpfile();
+ /* FALLTHRU */
+
+ case 'i':
+ compile(in, out, version);
+ /* if (-S) break; */
+
+ fclose(in);
+ in = out;
+ rewind(in);
+ out = tmpfile();
+ /* FALLTHRU */
+
+ case 's':
+ assemble(in, out);
+ break;
+
+ case 'a':
+ case 'o':
+ /* just link at the end */
+ break;
+
+ default:
+ fprintf(stderr, "%s: unknown file type %s\n", progname, sources[i]);
+ }
+ }
+
+ /* link(outfile, nobjects, objects); */
+}
diff --git a/trigraph.c b/trigraph.c
deleted file mode 100644
index b80b198..0000000
--- a/trigraph.c
+++ /dev/null
@@ -1,53 +0,0 @@
-#include <stdio.h>
-
-int main(int argc, char *argv[])
-{
- FILE *in = stdin;
-
- if (argc > 2) {
- printf("usage: %s [file]\n", argv[0]);
- return 1;
- }
-
- if (argc == 2) {
- in = fopen(argv[1], "r");
- if (in == NULL) {
- perror(argv[1]);
- return 1;
- }
- }
-
- int c;
- int q = 0;
- while ((c = fgetc(in)) != EOF) {
- if (q == 2) {
- switch (c) {
- case '=': putchar('#'); break;
- case '/': putchar('\\'); break;
- case '\'': putchar('^'); break;
- case '(': putchar('['); break;
- case ')': putchar(']'); break;
- case '!': putchar('|'); break;
- case '<': putchar('{'); break;
- case '>': putchar('}'); break;
- case '-': putchar('~'); break;
- case '?': putchar('?'); break;
-
- default:
- printf("??%c", c);
- break;
- }
-
- if (c != '?') {
- q = 0;
- }
- } else if (c == '?') {
- q++;
- } else {
- putchar(c);
- q = 0;
- }
- }
-
- return 0;
-}
diff --git a/trigraph.h b/trigraph.h
new file mode 100644
index 0000000..8390485
--- /dev/null
+++ b/trigraph.h
@@ -0,0 +1,6 @@
+#ifndef TRIGRAPH_H
+#define TRIGRAPH_H
+#include <stdio.h>
+
+int trigraph(FILE *in, FILE *out);
+#endif
diff --git a/trigraph.l b/trigraph.l
new file mode 100644
index 0000000..e9c4ca2
--- /dev/null
+++ b/trigraph.l
@@ -0,0 +1,44 @@
+%{
+#include <stdio.h>
+#include "trigraph.h"
+
+#define yylex tglex
+
+static FILE *tgout;
+static void replace_trigraph(int c);
+static size_t tgline = 0;
+static size_t tgchar = 0;
+%}
+
+%%
+
+\?\?[=\/'\(\)!<>\-?] { replace_trigraph(yytext[2]); }
+\n { tgline++; tgchar = 0; fputc(yytext[0], tgout); }
+. { tgchar++; fputc(yytext[0], tgout); }
+
+%%
+
+static void replace_trigraph(int c)
+{
+ static int tg[] = {
+ ['='] = '#',
+ ['/'] = '\\',
+ ['\''] = '^',
+ ['('] = '[',
+ [')'] = ']',
+ ['!'] = '|',
+ ['<'] = '>',
+ ['-'] = '~',
+ ['?'] = '?',
+ };
+ fprintf(stderr, "warning: replaced trigraph '??%c' with '%c' at __FILE__:%zd:%zd\n",
+ c, tg[c], /*__FILE__,*/ tgline, tgchar);
+ fputc(tg[c], tgout);
+}
+
+int trigraph(FILE *in, FILE *out)
+{
+ yyin = in;
+ tgout = out;
+ return tglex();
+}
diff --git a/version.h b/version.h
new file mode 100644
index 0000000..c5e4016
--- /dev/null
+++ b/version.h
@@ -0,0 +1,14 @@
+#ifndef VERSION_H
+#define VERSION_H
+
+#define C18 201710L
+#define C11 201112L
+#define C99 199901L
+#define C95 199409L
+#define C89 1L
+#define KNR 0L
+#define LATEST C18
+
+#define ISO_10646_VERSION "201910L"
+
+#endif