From fd528d27b11f1d85dbf6afcd65b6bb4140e56070 Mon Sep 17 00:00:00 2001 From: Jakob Kaivo Date: Thu, 8 Sep 2022 11:11:06 -0400 Subject: handle predefined and command line macro definitions --- Makefile | 56 +++++-- as/Makefile | 11 -- as/as.l | 25 --- as/as.y | 36 ----- as/x86.h | 38 ----- c89.l | 95 ----------- c89.y | 525 ------------------------------------------------------------ c99.l | 15 -- c99.y | 29 ---- cc.c | 157 ++++++++++++++++++ cc.h | 7 + cpp.c | 91 +++++++++++ cpp.h | 18 ++- cpp.y | 174 -------------------- main.c | 258 ----------------------------- trigraph.h | 6 - trigraph.l | 44 ----- 17 files changed, 307 insertions(+), 1278 deletions(-) delete mode 100644 as/Makefile delete mode 100644 as/as.l delete mode 100644 as/as.y delete mode 100644 as/x86.h delete mode 100644 c89.l delete mode 100644 c89.y delete mode 100644 c99.l delete mode 100644 c99.y create mode 100644 cc.c create mode 100644 cc.h create mode 100644 cpp.c delete mode 100644 cpp.y delete mode 100644 main.c delete mode 100644 trigraph.h delete mode 100644 trigraph.l diff --git a/Makefile b/Makefile index 1182d1b..e675b7e 100644 --- a/Makefile +++ b/Makefile @@ -1,23 +1,47 @@ .POSIX: -CFLAGS=-g -D_POSIX_C_SOURCE=200809L -YFLAGS=-d -LIBS=-ly -ll -PROGRAM=cc -SYMLINKS=c89 c99 lint cflow ctags -OBJECTS=main.o trigraph.o cpp.tab.o link.o -GENERATED=trigraph.c *.tab.c *.tab.h +# This Makefile was generated by maje +# See https://src.kaivo.net/dev/maje/ for more information +# Do not edit this Makefile by hand -all: $(PROGRAM) $(SYMLINKS) +CC=c99 +LD=$(CC) +CFLAGS=-Wall -Wextra -Wpedantic -Werror -g +LDFLAGS= +LDLIBS= +SRCDIR=. +OBJDIR=. +BINDIR=$(OBJDIR) +LIBDIR=$(OBJDIR) +DESTDIR=/usr/local -$(SYMLINKS): $(PROGRAM) - ln -s $(PROGRAM) $@ +all: $(BINDIR)/cc -cpp.tab.c cpp.tab.h: cpp.y - $(YACC) $(YFLAGS) -p cpp -b cpp cpp.y +clean: + rm -f $(BINDIR)/cc $(OBJDIR)/*.o -cc: $(OBJECTS) - $(CC) -o $@ $(OBJECTS) $(LIBS) +install: $(BINDIR)/cc + mkdir -p $(DESTDIR)/bin + cp $(BINDIR)/cc $(DESTDIR)/bin -clean: - rm -f $(PROGRAM) $(SYMLINKS) $(GENERATED) *.o +$(BINDIR)/cc: $(OBJDIR)/cc.o +$(OBJDIR)/cc.o: $(SRCDIR)/cc.h +$(OBJDIR)/cc.o: $(SRCDIR)/cc.c + @mkdir -p $(@D) + $(CC) $(CFLAGS) -o $@ -c $(SRCDIR)/cc.c + +$(BINDIR)/cc: $(OBJDIR)/link.o +$(OBJDIR)/link.o: $(SRCDIR)/link.h +$(OBJDIR)/link.o: $(SRCDIR)/link.c + @mkdir -p $(@D) + $(CC) $(CFLAGS) -o $@ -c $(SRCDIR)/link.c + +$(BINDIR)/cc: $(OBJDIR)/cpp.o +$(OBJDIR)/cpp.o: $(SRCDIR)/cc.h +$(OBJDIR)/cpp.o: $(SRCDIR)/cpp.c + @mkdir -p $(@D) + $(CC) $(CFLAGS) -o $@ -c $(SRCDIR)/cpp.c + +$(BINDIR)/cc: + @mkdir -p $(@D) + $(LD) $(LDFLAGS) -o $@ $(OBJDIR)/*.o $(LDLIBS) diff --git a/as/Makefile b/as/Makefile deleted file mode 100644 index 99fe9de..0000000 --- a/as/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -as: as.yy.o as.tab.o - c99 -o $@ as.yy.o as.tab.o -ly -ll - -as.yy.c: as.l as.tab.h - lex -t as.l > $@ - -as.tab.h as.tab.c: as.y - yacc -d -b as as.y - -clean: - rm -f as *.o as.yy.c as.tab.c as.tab.h diff --git a/as/as.l b/as/as.l deleted file mode 100644 index 059b7c3..0000000 --- a/as/as.l +++ /dev/null @@ -1,25 +0,0 @@ -%{ -#include -#include "as.tab.h" -%} - -NONDIGIT [_a-zA-Z] -DIGIT [0-9] -IDENTIFIERS [_a-zA-Z0-9] - -%x COMMENT - -%% - -{DIGIT}+ { yylval.n = strtoumax(yytext, NULL, 10); return NUMBER; } - -{NONDIGIT}{IDENTIFIERS}* { yylval.s = yytext; return TOKEN; } - -:|,|\. { return yytext[0]; } -\n { return NEWLINE; } - -; { BEGIN COMMENT; } -. ; -\n { BEGIN INITIAL; return NEWLINE; } - -. ; diff --git a/as/as.y b/as/as.y deleted file mode 100644 index ed8714f..0000000 --- a/as/as.y +++ /dev/null @@ -1,36 +0,0 @@ -%{ -#include -%} - -%union { - char *s; - uintmax_t n; -}; - -%token NUMBER -%token TOKEN -%token NEWLINE - -%% - -program - : /* empty */ - | instruction NEWLINE -; - -instruction - : bare_instruction - | TOKEN ':' bare_instruction -; - -bare_instruction - : TOKEN - | TOKEN operand - | TOKEN operand ',' operand - | TOKEN operand ',' operand ',' operand -; - -operand - : TOKEN - | NUMBER -; diff --git a/as/x86.h b/as/x86.h deleted file mode 100644 index 56bf639..0000000 --- a/as/x86.h +++ /dev/null @@ -1,38 +0,0 @@ -struct { -const char *mnemonic; -const char *opcode; -char x32; -char x64; -} x86_opcodes[] = { -{ "aaa", "37", 1, 0 }, - -{ "aad", "d5 0a", 1, 0 }, -{ "aad imm8", "db ib", 1, 0 }, - -{ "aam", "d4 0a", 1, 0 }, -{ "aam imm8", "d4 ib", 1, 0 }, - -{ "aas", "3f" , 1, 0 }, - -{ "adc al, imm8", "14 ib", 1, 1 }, -{ "adc ax, immm16", "15 iw", 1, 1 }, -{ "adc eax, imm32", "15 id", 1, 1 }, -{ "adc rax, imm32", "rex.w 15 id", 0, 1 }, -{ "adc r/m8, imm8", "80 /2 ib", 1, 1 }, -{ "adc r/m8*, imm8", "rex 80 /2 ib", 0, 1 }, -{ "adc r/m16, imm16", "81 /2 iw", 1, 1 }, -{ "adc r/m32, imm32", "81 /2 id", 1, 1 }, -{ "adc r/m64, imm32", "rex.w 81 /2 id", 0, 1 }, -{ "adc r/m16, imm8", "83 /2 ib", 1, 1 }, -{ "adc r/m32, imm8", "83 /2 ib", 1, 1 }, -{ "adc r/m64, imm8", "rex.w 83 /2 ib", 0, 1 }, -{ "adc r/m8, r8", "10 /r", 1, 1 }, -{ "adc r/m8*, r8*", "rex 10 /r", 0, 1 }, -{ "adc r/m16, r16", "11 /r", 1, 1 }, -{ "adc r/m32, r32", "11 /r", 1, 1 }, -{ "adc r/m64, r64", "rex.w 11 /r", 0, 1 }, -{ "adc r8, r/m8", "12 /r", 1, 1 }, -{ "adc r8*, r/m8*", "rex 12 /r", 0, 1 }, -{ "adc r16, r/m16", "13 /r", 1, 1 }, -{ "adc r32, r/m32", "13 /r", 1, 1 }, -{ "adc r64, r/m64", "rex.w 13 /r", 0, 1 }, diff --git a/c89.l b/c89.l deleted file mode 100644 index 3f16b44..0000000 --- a/c89.l +++ /dev/null @@ -1,95 +0,0 @@ -%{ - -%} - -DIGIT [0-9] -UPPER [A-Z] -LOWER [a-z] -NONDIGIT [_a-zA-Z] - -%% - /* keywords */ -"auto" { return AUTO; } -"break" { return BREAK; } -"case" { return CASE; } -"char" { return CHAR; } -"const" { return CONST; } -"continue" { return CONTINUE; } -"default" { return DEFAULT; } -"do" { return DO; } -"double" { return DOUBLE; } -"else" { return ELSE; } -"enum" { return ENUM; } -"extern" { return EXTERN; } -"float" { return FLOAT; } -"for" { return FOR; } -"goto" { return GOTO; } -"if" { return IF; } -"int" { return INT; } -"long" { return LONG; } -"register" { return REGISTER; } -"return" { return RETURN; } -"short" { return SHORT; } -"signed" { return SIGNED; } -"sizeof" { return SIZEOF; } -"static" { return STATIC; } -"struct" { return STRUCT; } -"switch" { return SWITCH; } -"typedef" { return TYPEDEF; } -"union" { return UNION; } -"unsigned" { return UNSIGNED; } -"void" { return VOID; } -"volatile" { return VOLATILE; } -"while" { return WHILE; } - - /* operators */ -"[" { return LBRACKET; } -"]" { return RBRACKET; } -"(" { return LPAREN; } -")" { return RPAREN; } -"." { return DOT; } -"->" { return ARROW; } -"++" { return INCREMENT; } -"--" { return DECREMENT; } -"&" { return AMPERSAND; } -"*" { return STAR; } -"+" { return PLUS; } -"-" { return MINUS; } -"~" { return TILDE; } -"!" { return BANG; } -"/" { return SLASH; } -"%" { return PERCENT; } -"<<" { return LSHIFT; } -">>" { return RSHIFT; } -"<" { return LESSTHAN; } -">" { return GREATERTHAN; } -"<=" { return LESSEQUAL; } -">=" { return GREATEREQUAL; } -"==" { return ISEQUAL; } -"!=" { return NOTEQUAL; } -"^" { return CARET; } -"|" { return PIPE; } -"&&" { return ANDAND; } -"||" { return OROR; } -"?" { return QUESTION; } -":" { return COLON; } -"=" { return EQUALS; } -"*=" { return STAREQUALS; } -"/=" { return SLASHEQUALS; } -"%=" { return PERCENTEQUALS; } -"+=" { return PLUSEQUALS; } -"-=" { return MINUSEQUALS; } -"<<=" { return LSHIFTEQUALS; } -">>=" { return RSHIFTEQUALS; } -"&=" { return ANDEQUALS; } -"^=" { return CARETEQUALS; } -"|=" { return PIPEEQUALS; } -"," { return COMMA; } -"#" { return HASH; } -"##" { return HASHHASH; } - - /* additional punctuators */ -"{" { return LBRACE; } -"}" { return RBRACE; } -";" { return SEMICOLON; } -"..." { return DOTDOTDOT; } diff --git a/c89.y b/c89.y deleted file mode 100644 index c4e7920..0000000 --- a/c89.y +++ /dev/null @@ -1,525 +0,0 @@ -%{ -#include - -int yylex(void); - -void yyerror(char *str) -{ - printf("WUT?: %s\n" str); -} -%} - -%union { -} - - /* keywords */ -%token AUTO BREAK CASE CHAR CONST CONTINUE DEFAULT DO DOUBLE ELSE ENUM EXTERN - FLOAT FOR GOTO IF INT LONG REGISTER RETURN SHORT SIGNED SIZEOF STATIC - STRUCT SWITCH TYPEDEF UNION UNSIGNED VOID VOLATILE WHILE - - /* operators */ -%token LBRACKET RBRACKET LPAREN RPAREN DOT ARROW INCREMENT DECREMENT AMPERSAND - STAR PLUS MINUS TILDE BANG SLASH PERCENT LSHIFT RSHIFT LESSTHAN - GREATERTHAN LESSEQUAL GREATEREQUAL ISEQUAL NOTEQUAL CARET PIPE ANDAND - OROR QUESTION COLON EQUALS STAREQUALS SLASHEQUALS PERCENTEQUALS - PLUSEQUALS MINUSEQUALS LSHIFTEQUALS RSHIFTEQUALS ANDEQUALS CARETEQUALS - PIPEEQUALS COMMA HASH HASHHASH - - /* additional punctuators */ -%token LBRACE RBRACE SEMICOLON DOTDOTDOT - -%% - -primary-expression: - identifier - | constant - | string-literal - | LPAREN expression RPAREN - ; - -postfix-expression: - primary-expression - | postfix-expression LBRACKET expression RBRACKET - | postfix-expression LPAREN argument-expression-list RPAREN - | postfix-expression LPRAEN RPAREN - | postfix-expression DOT identifier - | postfix-expression ARROW identifier - | postfix-expression INCREMENT - | postfix-expression DECREMENT - ; - -argument-expression-list: - assignment-expression - | argument-expression-list COMMA assignment-expression - ; - -unary-expression: - postfix-expression - | INCREMENT unary-expression - | DECREMENT unary-expression - | unary-operator cast-expression - | SIZEOF unary-expression - | SIZEOF LPAREN type-name RPAREN - ; - -unary-operator: - AMPERSAND - | STAR - | PLUS - | MINUS - | TILDE - | BANG - ; - -cast-expression: - unary-expression - | LPAREN type-name RPAREN cast-expression - ; - -multiplicative-expression: - cast-expression - | multiplicative-expression STAR cast-expression - | multiplicative-expression SLASH cast-expression - | multiplicative-expression PERCENT cast-expression - ; - -additive-expression: - multiplicative-expression - | additive-expression PLUS multiplicative-expression - | additive-expression MINUS multiplicative-expression - ; - -shift-expression: - additive-expression - | shift-expression LSHIFT additive-expression - | shift-expression RSHIFT additive-expression - ; - -relational-expression: - shift-expression - | relational-expression LESSTHAN shift-expression - | relational-expression GREATERTHAN shift-expression - | relational-expression LESSEQUAL shift-expression - | relational-expression GREATEREQUAL shift-expression - ; - -equality-expression: - relational-expression - | equality-expression ISEQUAL relational-expression - | equality-expression NOTEQUAL relational-expression - ; - -AND-expression: - equality-expression - | AND-expression AMPERSAND equality-expression - ; - -exclusive-OR-expression: - AND-expression - | exclusive-OR-expression CARET AND-expression - ; - -inclusive-OR-expression: - exclusive-OR-expression - | inclusive-OR-expression PIPE exclusive-OR-expression - ; - -logical-AND-expression: - inclusive-OR-expression - | logical-AND-expression ANDAND inclusive-OR-expression - ; - -logical-OR-expression: - logical-AND-expression - | logical-OR-expression OROR logical-AND-expression - ; - -conditional-expression: - logical-OR-expression - | logical-OR-expression QUESTION expression COLON conditional-expression - ; - -assignment-expression: - conditional-expression - | unary-expression assignment-operator assignment-expression - ; - -assignment-operator: - EQUALS - | STAREQUALS - | SLASHEQUALS - | PERCENTEQUALS - | PLUSEQUALS - | MINUSEQUALS - | LSHIFTEQUALS - | RSHIFTEQUALS - | ANDEQUALS - | CARETEQUALS - | PIPEEQUALS - ; - -expression: - assignment-expression - | expression COMMA assignment-expression - ; - -constant-expression: - conditional-expression - ; - -declaration: - declaration-specifiers SEMICOLON - | declaration-specifiers init-declarator-list SEMICOLON - ; - -declaration-specifiers: - storage-class-specifier - | storage-class-specifier declaration-specifiers - | type-specifier - | type-specifier declaration-specifiers - | type-qualifier - | type-qualifier declaration-specifiers - ; - -init-declarator-list: - init-declarataor - | init-declarator-list COMMA init-declarator - ; - -init-declarator: - declarator - | declarator EQUALS initializer - ; - -storage-class-specifier: - TYPEDEF - | EXTERN - | STATIC - | AUTO - | REGISTER - ; - -type-specifier: - VOID - | CHAR - | SHORT - | INT - | LONG - | FLOAT - | DOUBLE - | SIGNED - | UNSIGNED - | struct-or-union-specifier - | enum-specifier - | typedef-name - ; - -struct-or-union-specifier: - struct-or-union LBRACE struct-declaration-list RBRACE - | struct-or-union identifier LBRACE struct-declaration-list RBRACE - | struct-or-union identifier - ; - -struct-or-union: - STRUCT - | UNION - ; - -struct-declaration-list: - struct-declaration - | struct-declaration-list struct declaration - ; - -struct-declaration: - specifier-qualifier-list struct-declarator-list SEMICOLON - ; - -specifier-qualifier-list: - type-specifier - | type-specifier specifier-qualifier-list - | type-qualifier - | type-qualifier specifier-qualifier-list - ; - -struct-declarator-list: - struct-declarator - | struct-declarator-list COMMA struct-declarator - ; - -struct-declarator: - declarator - | COMMA constant-expression - | declarator COMMA constant-expression - ; - -enum-specifier: - ENUM LBRACE enumerator-list RBRACE - | ENUM identifier LBRACE enumerator-list RBRACE - | ENUM identifier - ; - -enumerator-list: - enumerator - | enumerator-list COMMA enumerator - ; - -enumerator: - enumeration-constant - | enumeration-constant EQUALS constant-expression - ; - -type-qualifier: - CONST - | VOID - ; - -declarator: - direct-declarator - | pointer direct-declarator - ; - -direct-declarator: - identifier - | LPAREN declarator RPAREN - | direct-declarator LBRACKET RBRACKET - | direct-declarator LBRACKET constant-expression RBRACKET - | direct-declarator LPAREN parameter-type-list RPAREN - | direct-declarator LPAREN RPAREN - | direct-declarator LPAREN identifier-list RPAREN - ; - -pointer: - STAR - | STAR type-qualifier-list - | STAR pointer - | STAR type-qualifier-list pointer - ; - -type-qualifier-list: - type-qualifier - | type-qualifier-list type-qualifier - ; - -parameter-type-list: - parameter-list - | parameter-list COMMA DOTDOTDOT - ; - -parameter-list: - parameter-declaration - | parameter-list COMMA parameter-declaration - ; - -parameter-declaration: - declaration-specifiers declarator - | declaration-specifiers - | declaration-specifiers abstract-declarator - ; - -identifier-list: - identifier - | identifier-list COMMA identifier - ; - -type-name: - specifier-qualifier-list - | specifier-qualifier-list abstract-declarator - ; - -abstract-declarator: - pointer - | direct-abstract-declarator - | point direct-abstract-declarator - ; - -direct-abstract-declarator: - LPAREN abstract-declarator RPAREN - | LBRACKET RBRACKET - | LBRACKET constant-expression RBRACKET - | direct-abstract-declarator LBRACKET RBRACKET - | direct-abstract-declarator LBRACKET constant-expression RBRACKET - | LPAREN RPAREN - | LPAREN parameter-type-list RPAREN - | direct-abstract-declarator LPAREN RPAREN - | direct-abstract-declarator LPAREN parameter-type-list RPAREN - ; - -typdef-name: - identifier - ; - -initializer: - assignment-expression - | LBRACE initializer-list RBRACE - | LBRACE initializer-list COMMA RBRACE - ; - -initializer-list: - initializer - | initializer-list COMMA initializer - ; - -statement: - labeled-statement - | compound-statement - | expression-statement - | selection-statement - | iteration-statement - | jump-statement - ; - -labeled-statement: - identifier COLON statement - | CASE constant-expression COLON statement - | DEFAULT COLON statement - ; - -compound-statement: - LBRACE RBRACE - | LBRACE declaration-list RBRACE - | LBRACE statement-list RBRACE - | LBRACE declaration-list statement-list RBRACE - ; - -declaration-list: - declaration - | declaration-list declaration - ; - -statement-list: - statement - | statement-list statement - ; - -expression-statement: - SEMICOLON - | expression SEMICOLON - ; - -selection-statement: - IF LPAREN expression RPAREN statement - | IF LPAREN expression RPAREN statement ELSE statement - | SWITCH LPAREN expression RPAREN statement - ; - -iteration-statement: - WHILE LPAREN expression RPAREN statement - | DO statement WHILE LPAREN expression RPAREN SEMICOLON - | FOR LPAREN SEMICOLON SEMICOLON RPAREN statement - | FOR LPAREN expression SEMICOLON SEMICOLON RPAREN statement - | FOR LPAREN SEMICOLON expression SEMICOLON RPAREN statement - | FOR LPAREN SEMICOLON SEMICOLON expression RPAREN statement - | FOR LPAREN expression SEMICOLON expression SEMICOLON RPAREN statement - | FOR LPAREN SEMICOLON expression SEMICOLON expression RPAREN statement - | FOR LPAREN expression SEMICOLON SEMICOLON expression RPAREN statement - | FOR LPAREN expression SEMICOLON expression SEMICOLON expression RPAREN statement - ; - -jump-statement: - GOTO identifier SEMICOLON - | CONTINUE SEMICOLON - | BREAK SEMICOLON - | RETURN SEMICOLON - | RETURN expression SEMICOLON - ; - -translation-unit: - external-declaration - | translation-unit external-declaration - ; - -external-declaration: - function-definition - | declaration - ; - -function-definition: - declarator compound-statement - | declaration-specifiers declarator compound-statement - | declarator declaration-list compound-statement - | declaration-specifiers declarator declaration-list compound-statement - ; - -preprocessing-file: - /* optional */ - | group - ; - -group: - group-part - | group group-part - ; - -group-part: - new-line - | pp-tokens new-line - | if-section - | control-line - ; - -if-section: - if-group endif-line - | if-group elif-groups endif-line - | if-group else-group endif-line - | if-group elif-groups else-group endif-line - ; - -if-group: - HASH IF constant-expression new-line - | HASH IF constant-expression new-line group - | HASH IFDEF identifier new-line - | HASH IFDEF identifier new-line group - | HASH IFNDEF identifier new-line - | HASH IFNDEF identifier new-line group - ; - -elif-groups: - elif-group - | elif-groups elif-group - ; - -elif-group: - HASH ELIF constant-expression new-line - | HASH ELIF constant-expression new-line group - ; - -else-group: - HASH ELSE new-line - | HASH ELSE new-line group - ; - -endif-line: - HASH ENDIF new-line - ; - -control-line: - HASH INCLUDE pp-tokens new-line - | HASH DEFINE identifier replacement-list new-line - | HASH DEFINE identifier lparen RPAREN replacement-list new-line - | HASH DEFINE identifier lparen identifier-list RPAREN replacement-list new-line - | HASH UNDEF identifier new-line - | HASH LINE pp-tokens new-line - | HASH ERROR new-line - | HASH ERROR pp-tokens new-line - | HASH PRAGMA new-line - | HASH PRAGMA pp-tokens new-line - | HASH new-line - ; - -lparen: - LPAREN /* without preceding white space */ - ; - -replacement-list: - /* optional */ - | pp-tokens - ; - -pp-tokens: - preprocessing-token - | pp-tokens preprocessing-token - ; - -new-line: - NEWLINE - ; diff --git a/c99.l b/c99.l deleted file mode 100644 index 184980d..0000000 --- a/c99.l +++ /dev/null @@ -1,15 +0,0 @@ -%{ -#include "c99.tab.h" -%} - -%% -; { printf ("SEMICOLON\n"); return SEMICOLON; } -"+" { printf ("PLUS\n"); return PLUS; } -"-" { printf ("MINUS\n"); return MINUS; } -int { printf ("INT\n"); return INT; } -"\".*\"" { printf ("string literal:%s\n", yytext); return STRING; } -[a-zA-z_]+ { printf ("identifier: %s\n", yytext); return IDENTIFIER; } -"/*[.]**/" { printf ("COMMENT\n"); return COMMENT; } -%% - -int yywrap (void) { return 0; } diff --git a/c99.y b/c99.y deleted file mode 100644 index 1788c02..0000000 --- a/c99.y +++ /dev/null @@ -1,29 +0,0 @@ -%{ -int yylex(void); -void yyerror(char const *); -%} - -%token IDENTIFIER COMMENT -%token STRING NUMBER - - /* C89 keywords */ -%token AUTO BREAK CASE CHAR CONST CONTINUE DEFAULT DO DOUBLE ELSE -%token ENUM EXTERN FLOAT FOR GOTO IF INT LONG REGISTER -%token RETURN SHORT SIGNED SIZEOF STATIC STRUCT SWITCH TYPEDEF UNION -%token UNSIGNED VOID VOLATILE WHILE - - /* Punctuation */ -%token PLUS MINUS STAR SLASH -%token COMMA SEMICOLON -%token QUESTION COLON -%token LPAREN RPAREN -%token LBRACE RBRACE -%token LBRACKET RBRACKET - - /* C99 keywords */ -%token INLINE RESTRICT _BOOL _COMPLEX _IMAGINARY - -%% - -input: %empty - ; diff --git a/cc.c b/cc.c new file mode 100644 index 0000000..71e179b --- /dev/null +++ b/cc.c @@ -0,0 +1,157 @@ +#define _XOPEN_SOURCE 700 +#include +#include +#include +#include +#include +#include + +#include "cc.h" +#include "cpp.h" +#include "link.h" +#include "version.h" + +void error(const char *file, uintmax_t line, const char *fmt, ...) +{ + if (file) { + fprintf(stderr, "%s:%ju: ", file, line); + } else if (line == 0) { + fprintf(stderr, ": "); + } else { + fprintf(stderr, ": "); + } + + va_list ap; + va_start(ap, fmt); + vfprintf(stderr, fmt, ap); + va_end(ap); + exit(1); +} + +static struct macro *do_predefs(const char *argv0) +{ + long int version = LATEST; + char *fullname = strdup(argv0); + char *progname = basename(fullname); + + struct macro *defs = define(NULL, NULL, 0, "__STDC__", "1"); + + if (!strcmp(progname, "c18")) { + version = C18; + } else if (!strcmp(progname, "c11")) { + /* warn */ + version = C11; + } else if (!strcmp(progname, "c99")) { + version = C99; + } else if (!strcmp(progname, "c89")) { + version = C95; + } + + if (version >= C95) { + char ppversion[8] = ""; + sprintf(ppversion, "%ldL", version); + define(defs, NULL, 0, "__STDC_VERSION__", ppversion); + } + + if (version >= C99) { + define(defs, NULL, 0, "__STDC_HOSTED__", "1"); + define(defs, NULL, 0, "__STDC_IEC_559__", "1"); + define(defs, NULL, 0, "__STDC_IEC_559_COMPLEX__", "1"); + define(defs, NULL, 0, "__STDC_ISO_10646__", ISO_10646_VERSION); + } + + if (version >= C11) { + define(defs, NULL, 0, "__STDC_UTF_16__", "1"); + define(defs, NULL, 0, "__STDC_UTF_32__", "1"); + define(defs, NULL, 0, "__STDC_ANALYZABLE__", "1"); + define(defs, NULL, 0, "__STDC_LIB_EXT1__", "1"); + /* __STDC_NO_ATOMICS__ */ + /* __STDC_NO_COMPLEX__ */ + /* __STDC_NO_THREADS__ */ + /* __STDC_NO_VLA__ */ + } + + return defs; +} + +static int getoptarg(char *argv[], int i, char **arg) +{ + if (argv[i][2] != '\0') { + *arg = argv[i] + 2; + return 0; + } + *arg = argv[i + 1]; + return 1; +} + +int main(int argc, char *argv[]) +{ + enum { PREPROCESSED, ASSEMBLY, OBJECT, BINARY } output = BINARY; + char *progname = basename(argv[0]); + char *output_path = "a.out"; + + struct macro *predef = do_predefs(argv[0]); + + for (int i = 1; i < argc; i++) { + if (!strcmp(argv[i], "-c")) { + output = OBJECT; + + } else if (!strcmp(argv[i], "-g")) { + /* include debugging symbols */ + + } else if (!strcmp(argv[i], "-s")) { + /* strip */ + + } else if (!strcmp(argv[i], "-E")) { + output = PREPROCESSED; + + } else if (!strncmp(argv[i], "-o", 2)) { + i += getoptarg(argv, i, &output_path); + + } else if (!strncmp(argv[i], "-D", 2)) { + char *macro = NULL; + i += getoptarg(argv, i, ¯o); + char *eq = strchr(macro, '='); + if (eq) { + *eq = '\0'; + eq++; + } + define(predef, NULL, 1, macro, eq ? eq : "1"); + + } else if (!strncmp(argv[i], "-I", 2)) { + char *path = NULL; + i += getoptarg(argv, i, &path); + //include(path); + + } else if (!strncmp(argv[i], "-L", 2)) { + char *path = NULL; + i += getoptarg(argv, i, &path); + //libpath(path); + + } else if (!strncmp(argv[i], "-O", 2)) { + /* optimize */ + + } else if (!strncmp(argv[i], "-U", 2)) { + char *macro = NULL; + i += getoptarg(argv, i, ¯o); + //undef(macro); + + } else if (!strncmp(argv[i], "-l", 2)) { + char *lib = NULL; + i += getoptarg(argv, i, &lib); + //addobj(lib); + + } else if (argv[i][0] == '-') { + fprintf(stderr, "%s: unknown option %s\n", progname, argv[i]); + /* invalid option */ + } else { + switch (output) { + case PREPROCESSED: + preprocess(argv[i], output_path, predef); + break; + default: + break; + } + } + } +} diff --git a/cc.h b/cc.h new file mode 100644 index 0000000..2b122a7 --- /dev/null +++ b/cc.h @@ -0,0 +1,7 @@ +#ifndef CC_H +#define CC_H +#include + +void error(const char *file, uintmax_t line, const char *fmt, ...); + +#endif diff --git a/cpp.c b/cpp.c new file mode 100644 index 0000000..aeebaa8 --- /dev/null +++ b/cpp.c @@ -0,0 +1,91 @@ +#define _XOPEN_SOURCE 700 +#include +#include +#include +#include + +#include "cc.h" +#include "cpp.h" + +static void dump(struct macro *list) +{ + for (struct macro *p = list; p != NULL; p = p->next) { + if (p->file) { + printf("%s:%ju: ", p->file, p->line); + } else if (p->line == 0) { + printf(": "); + } else { + printf(": "); + } + printf("'%s' => '%s'\n", p->identifier, p->replacement); + } +} + +int preprocess(const char *in, const char *out, struct macro *predefined) +{ + (void)in; (void)out; + + time_t now = time(NULL); + struct tm *tm = localtime(&now); + char compiletime[16] = { 0 }; + char compiledate[16] = { 0 }; + strftime(compiletime, sizeof(compiletime), "%T", tm); + strftime(compiledate, sizeof(compiledate), "%b %e %Y", tm); + + struct macro *perfile = define(NULL, NULL, 0, "__DATE__", compiledate); + define(perfile, NULL, 0, "__TIME__", compiletime); + define(perfile, NULL, 0, "__FILE__", in); + define(perfile, NULL, 0, "__LINE__", "0"); + + dump(predefined); + dump(perfile); + + return 0; +} + +static struct macro *do_define(struct macro *list, const char *file, uintmax_t line, const char *macro, const char *replacement) +{ + struct macro *def = list; + + /* TODO: replace existing thing */ + /* TODO: prevent redefining builtins */ + + if (def == NULL) { + def = calloc(1, sizeof(*def)); + list = def; + } else { + while (def->next != NULL) { + def = def->next; + } + def->next = calloc(1, sizeof(*def)); + def = def->next; + } + + if (def == NULL) { + return NULL; + } + + if (file) { + def->file = strdup(file); + if (file == NULL) { + return NULL; + } + } + def->line = line; + def->identifier = strdup(macro); + def->replacement = strdup(replacement); + if (def->identifier == NULL || def->replacement == NULL) { + return NULL; + } + + return list; +} + +struct macro *define(struct macro *list, const char *file, uintmax_t line, const char *macro, const char *replacement) +{ + list = do_define(list, file, line, macro, replacement); + if (list == NULL) { + error(file, line, "out of memory defining macro"); + } + return list; +} diff --git a/cpp.h b/cpp.h index 9ee4769..319ac7e 100644 --- a/cpp.h +++ b/cpp.h @@ -1,11 +1,17 @@ #ifndef CPP_H #define CPP_H -#include +#include -int preprocess(FILE *in, FILE *out, long int version); -void define(const char *macro, const char *replacement); -void undef(const char *macro); -int defined(const char *macro); -void include(const char *path); +struct macro { + char *file; + uintmax_t line; + char *identifier; + char *replacement; + struct macro *next; +}; + +int preprocess(const char *infile, const char *outfile, struct macro *predefined_list); +struct macro *define(struct macro *list, const char *file, uintmax_t line, const char *macro, const char *replacement); +struct macro *undef(struct macro *list, const char *macro); #endif diff --git a/cpp.y b/cpp.y deleted file mode 100644 index 798e976..0000000 --- a/cpp.y +++ /dev/null @@ -1,174 +0,0 @@ -%{ -#define _XOPEN_SOURCE 700 -#include -#include -#include -#include -#include "version.h" - -#define HASHSIZE 1024 - -int cpplex(void); -int cpperror(const char *s); -%} - -%token IDENTIFIER -%token LPAREN -%token H_CHAR Q_CHAR -%token DIGIT -%token IF IFDEF IFNDEF ELIF ELSE ENDIF -%token INCLUDE DEFINE UNDEF LINE ERROR PRAGMA -%token OPERATOR PUNCTUATOR -%token CHARACTER_CONSTANT STRING_LITERAL -%token sign identifier_list constant_expression - -%start preprocessing_file - -%% - -preprocessing_file : /* empty */ - | group - ; - -group : group_part - | group group_part; - -group_part : new_line - | pp_tokens new_line - | if_section - | control_line - ; - -if_section : if_group endif_line - | if_group elif_groups endif_line - | if_group else_group endif_line - | if_group elif_groups else_group endif_line - ; - -if_group : '#' IF constant_expression new_line - | '#' IF constant_expression new_line group - | '#' IFDEF IDENTIFIER new_line - | '#' IFDEF IDENTIFIER new_line group - | '#' IFNDEF IDENTIFIER new_line - | '#' IFNDEF IDENTIFIER new_line group - ; - -elif_groups : elif_group - | elif_groups elif_group - ; - -elif_group : '#' ELIF constant_expression new_line - | '#' ELIF constant_expression new_line group - ; - -else_group : '#' ELSE new_line - | '#' ELSE new_line group - ; - -endif_line : '#' ENDIF new_line - ; - -control_line : '#' INCLUDE pp_tokens new_line - | '#' DEFINE IDENTIFIER replacement_list new_line - | '#' DEFINE IDENTIFIER LPAREN ')' replacement_list new_line - | '#' DEFINE IDENTIFIER LPAREN identifier_list ')' replacement_list new_line - | '#' UNDEF IDENTIFIER new_line - | '#' LINE pp_tokens new_line - | '#' ERROR new_line - | '#' ERROR pp_tokens new_line - | '#' PRAGMA new_line - | '#' PRAGMA pp_tokens new_line - | '#' new_line - ; - -/* LPAREN: '(' without preceding whitespace */ - -replacement_list : /* empty */ - | pp_tokens - ; - -pp_tokens : preprocessing_token - | pp_tokens preprocessing_token - ; - -new_line : '\n' - ; - -preprocessing_token : header_name - | IDENTIFIER - | pp_number - | CHARACTER_CONSTANT - | STRING_LITERAL - | OPERATOR - | PUNCTUATOR - ; - -header_name : '<' h_char_sequence '>' - | '"' q_char_sequence '"' - ; - -h_char_sequence : H_CHAR - | h_char_sequence H_CHAR - ; - -q_char_sequence : Q_CHAR - | q_char_sequence Q_CHAR - ; - -pp_number : DIGIT - | '.' DIGIT - | pp_number DIGIT - /* | pp_number nondigit */ - | pp_number 'e' sign - | pp_number 'E' sign - | pp_number '.' - ; - -%% - -int cpplex(void) -{ - return 0; -} - -int cpperror(const char *s) -{ - (void)s; - return 0; -} - -void define(const char *macro, const char *replacement) -{ - static int created = 0; - if (!created) { - hcreate(HASHSIZE); - } - ENTRY e = { - .key = strdup(macro), - .data = strdup(replacement) - }; - ENTRY *p = hsearch(e, ENTER); - if (p == NULL) { - cpperror(strerror(ENOMEM)); - } - printf("replacing '%s' with '%s'\n", macro, replacement == (char*)-1 ? "INTERNAL" : replacement); -} - -void undef(const char *macro) -{ - printf("undefining '%s'\n", macro); -} - -void include(const char *path) -{ - printf("adding '%s' to INCLUDE path\n", path); -} - -int preprocess(FILE *in, FILE *out, long int version) -{ - printf("preprocessing\n"); - int c = 0; - while ((c = fgetc(in)) != EOF) { - fputc(c, out); - } -} diff --git a/main.c b/main.c deleted file mode 100644 index 6ad8160..0000000 --- a/main.c +++ /dev/null @@ -1,258 +0,0 @@ -#define _XOPEN_SOURCE 700 -#include -#include -#include -#include -#include - -#include "cpp.h" -#include "link.h" -#include "trigraph.h" -#include "version.h" - -static char **sources = NULL; -size_t nsources = 0; - -static char **objects = NULL; -size_t nobjects = 0; - -void compile(FILE *in, FILE *out, long int version) -{ - printf("compiling\n"); -} - -void assemble(FILE *in, FILE *out) -{ - printf("assembling\n"); -} - -long int setversion(const char *progname) -{ - long int version = LATEST; - - define("__STDC__", "1"); - /* define("__FILE__", argv[optind]); */ - /* define("__DATE__", (char*)-1); */ - /* define("__TIME__", (char*)-1); */ - - if (!strcmp(progname, "c18")) { - version = C18; - } else if (!strcmp(progname, "c11")) { - /* warn */ - version = C11; - } else if (!strcmp(progname, "c99")) { - version = C99; - } else if (!strcmp(progname, "c89")) { - version = C95; - } - - /* if (!defined("__STDC_VERSION__") { */ - /* version = C89; */ - /* } */ - - /* if (!defined("__STDC__")) { */ - /* version = KNR; */ - /* } */ - - if (version >= C95) { - char ppversion[8] = ""; - sprintf(ppversion, "%dL", version); - define("__STDC_VERSION__", ppversion); - } - - if (version >= C99) { - define("__STDC_HOSTED__", "1"); - define("__STDC_IEC_559__", "1"); - define("__STDC_IEC_559_COMPLEX__", "1"); - define("__STDC_ISO_10646__", ISO_10646_VERSION); - } - - if (version >= C11) { - define("__STDC_UTF_16__", "1"); - define("__STDC_UTF_32__", "1"); - define("__STDC_ANALYZABLE__", "1"); - define("__STDC_LIB_EXT1__", "1"); - /* __STDC_NO_ATOMICS__ */ - /* __STDC_NO_COMPLEX__ */ - /* __STDC_NO_THREADS__ */ - /* __STDC_NO_VLA__ */ - } - - return version; -} - -static int getoptarg(char *argv[], int i, char **arg) -{ - if (argv[i][2] != '\0') { - *arg = argv[i] + 2; - return 0; - } - *arg = argv[i + 1]; - return 1; -} - -void addobj(char *path) -{ - nobjects++; - objects = realloc(objects, sizeof(*objects) * nobjects); - objects[nobjects - 1] = path; - - printf("adding '%s' to list of objects\n", path); -} - -void addsource(char *path) -{ - nsources++; - sources = realloc(sources, sizeof(*sources) * nsources); - sources[nsources - 1] = path; - - printf("adding '%s' to list of source files\n", path); - - char obj[strlen(path) + 1]; - strcpy(obj, path); - obj[strlen(obj) - 1] = 'o'; - addobj(obj); -} - -int main(int argc, char *argv[]) -{ - int cpp_stdout = 0; - int skip_linking = 0; - char *progname = basename(argv[0]); - char *output = "a.out"; - - /* special cases for different program names */ - if (!strcmp(progname, "lint")) { - } else if (!strcmp(progname, "cflow")) { - } else if (!strcmp(progname, "ctags")) { - } - - for (int i = 1; i < argc; i++) { - if (!strcmp(argv[i], "-c")) { - skip_linking = 1; - - } else if (!strcmp(argv[i], "-g")) { - /* include debugging symbols */ - - } else if (!strcmp(argv[i], "-s")) { - /* strip */ - - } else if (!strcmp(argv[i], "-E")) { - cpp_stdout = 1; - - } else if (!strncmp(argv[i], "-o", 2)) { - i += getoptarg(argv, i, &output); - - } else if (!strncmp(argv[i], "-D", 2)) { - char *macro = NULL; - i += getoptarg(argv, i, ¯o); - char *eq = strchr(macro, '='); - if (eq) { - *eq = '\0'; - eq++; - } - define(macro, eq ? eq : "1"); - - } else if (!strncmp(argv[i], "-I", 2)) { - char *path = NULL; - i += getoptarg(argv, i, &path); - include(path); - - } else if (!strncmp(argv[i], "-L", 2)) { - char *path = NULL; - i += getoptarg(argv, i, &path); - libpath(path); - - } else if (!strncmp(argv[i], "-O", 2)) { - /* optimize */ - - } else if (!strncmp(argv[i], "-U", 2)) { - char *macro = NULL; - i += getoptarg(argv, i, ¯o); - undef(macro); - - } else if (!strncmp(argv[i], "-l", 2)) { - char *lib = NULL; - i += getoptarg(argv, i, &lib); - addobj(lib); - - } else if (argv[i][0] == '-') { - fprintf(stderr, "%s: unknown option %s\n", progname, argv[i]); - /* invalid option */ - } else { - addsource(argv[i]); - } - } - - long int version = setversion(progname); - - for (size_t i = 0; i < nsources; i++) { - printf("compiling '%s'\n", sources[i]); - FILE *in = fopen(sources[i], "r"); - if (in == NULL) { - fprintf(stderr, "%s: %s: %s\n", progname, sources[i], strerror(errno)); - continue; - } - FILE *out = tmpfile(); - if (out == NULL) { - fprintf(stderr, "%s: tmpfile(): %s\n", progname, strerror(errno)); - continue; - } - - char *dot = strrchr(sources[i], '.'); - if (!dot || strlen(dot) != 2) { - fprintf(stderr, "%s: don't know what to do with %s\n", progname, sources[i]); - return 1; - } - - define("__FILE__", sources[i]); - switch (dot[1]) { - case 'c': - trigraph(in, out); - - fclose(in); - in = out; - rewind(in); - - if (cpp_stdout) { - out = stdout; - } else { - out = tmpfile(); - } - preprocess(in, out, version); - if (cpp_stdout) { - break; - } - - fclose(in); - in = out; - rewind(in); - out = tmpfile(); - /* FALLTHRU */ - - case 'i': - compile(in, out, version); - /* if (-S) break; */ - - fclose(in); - in = out; - rewind(in); - out = tmpfile(); - /* FALLTHRU */ - - case 's': - assemble(in, out); - break; - - case 'a': - case 'o': - /* just link at the end */ - break; - - default: - fprintf(stderr, "%s: unknown file type %s\n", progname, sources[i]); - } - } - - /* link(outfile, nobjects, objects); */ -} diff --git a/trigraph.h b/trigraph.h deleted file mode 100644 index 8390485..0000000 --- a/trigraph.h +++ /dev/null @@ -1,6 +0,0 @@ -#ifndef TRIGRAPH_H -#define TRIGRAPH_H -#include - -int trigraph(FILE *in, FILE *out); -#endif diff --git a/trigraph.l b/trigraph.l deleted file mode 100644 index e9c4ca2..0000000 --- a/trigraph.l +++ /dev/null @@ -1,44 +0,0 @@ -%{ -#include -#include "trigraph.h" - -#define yylex tglex - -static FILE *tgout; -static void replace_trigraph(int c); -static size_t tgline = 0; -static size_t tgchar = 0; -%} - -%% - -\?\?[=\/'\(\)!<>\-?] { replace_trigraph(yytext[2]); } -\n { tgline++; tgchar = 0; fputc(yytext[0], tgout); } -. { tgchar++; fputc(yytext[0], tgout); } - -%% - -static void replace_trigraph(int c) -{ - static int tg[] = { - ['='] = '#', - ['/'] = '\\', - ['\''] = '^', - ['('] = '[', - [')'] = ']', - ['!'] = '|', - ['<'] = '>', - ['-'] = '~', - ['?'] = '?', - }; - fprintf(stderr, "warning: replaced trigraph '??%c' with '%c' at __FILE__:%zd:%zd\n", - c, tg[c], /*__FILE__,*/ tgline, tgchar); - fputc(tg[c], tgout); -} - -int trigraph(FILE *in, FILE *out) -{ - yyin = in; - tgout = out; - return tglex(); -} -- cgit v1.2.1