compiler

Unnamed repository; edit this file 'description' to name the repository.
git clone https://git.deepztream.com/compiler
Log | Files | Refs

commit f2dc46ee6498b0d3cf6b129e493e16666b147c21
Author: William Djupström <william@deepztream.com>
Date:   Mon, 25 Feb 2019 23:31:32 +0000

Initial commit

Diffstat:
AMakefile | 11+++++++++++
Aexamples/example.1 | 7+++++++
Aexamples/example.2 | 9+++++++++
Alex.l | 46++++++++++++++++++++++++++++++++++++++++++++++
Aparse.y | 557+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
5 files changed, 630 insertions(+), 0 deletions(-)

diff --git a/Makefile b/Makefile @@ -0,0 +1,11 @@ +compiler: parse.c lex.c + gcc -g -o $@ parse.c lex.c + +parse.c: parse.y + bison --defines=parse.tab.h --report=all -o $@ $< + +lex.c: lex.l + flex --header-file=lex.h -o $@ $< + +clean: + rm -f *.o parse.tab.h lex.h compiler parse.c lex.c parse.output diff --git a/examples/example.1 b/examples/example.1 @@ -0,0 +1,7 @@ +_start() { + decl a,b,c; + a = 5; + b = 30; + c = a + b * 2; + return c; +} diff --git a/examples/example.2 b/examples/example.2 @@ -0,0 +1,9 @@ +exp(b, e) { + decl r = 1; + if (!e) return 1; + while (e > 1) { + if (!(e % 2)) b = b * b, e = e >> 1; + if (e % 2) e = e - 1, r = r * b; + } + return b * r; +} diff --git a/lex.l b/lex.l @@ -0,0 +1,46 @@ +%{ +#include "tokens.h" +#include "parse.tab.h" +extern int line; +%} + +%% +"return" {return RETURN;} +"if" {return IF;} +"else" {return ELSE;} +"while" {return WHILE;} +"decl" {return DECL;} + +"++" {return INC;} +"--" {return DEC;} +"+=" {return ADD_EQ;} +"-=" {return SUB_EQ;} +"*=" {return MUL_EQ;} +"/=" {return DIV_EQ;} +"%=" {return MOD_EQ;} +"&=" {return AND_EQ;} +"|=" {return OR_EQ;} +"^=" {return XOR_EQ;} +"<<=" {return SHL_EQ;} +">>=" {return SHR_EQ;} +"==" {return EQ;} +"!=" {return NOT_EQ;} +"<=" {return GTEQ;} +">=" {return LTEQ;} +"&&" {return AND;} +"||" {return OR;} +"<<" {return SHIFT_L;} +">>" {return SHIFT_R;} + +[0-9.]+ {yylval.i = atol(yytext); return NUMBER;} +[a-zA-Z_][a-zA-Z0-9_]* {yylval.str = calloc(strlen(yytext)+1, 1); strcpy(yylval.str, yytext); return IDENTIFIER;} +\"[^\"]*\" {yytext[strlen(yytext) - 1] = 0; yylval.str = calloc(strlen(yytext), 1); strcpy(yylval.str, yytext+1); return STRING;} +[ \t]+ {;} +"\r\n" {line++;} +[\r\n] {line++;} +. {return *yytext;} +%% +/* +. {return *yytext;} +%% +*/ diff --git a/parse.y b/parse.y @@ -0,0 +1,557 @@ +%define parse.error verbose + +%{ +#include "lex.h" +int line = 1; +char *fname; +int yywrap(void){ return 1; } +int yylex(void); +void yyerror(char *s) +{ + fprintf(stderr, "%s:%i:%s\n", fname, line, s); + exit(1); +} + +#define IDENTIFIER_TYPES( f ) \ + f(undefined) \ + f(variable) \ + f(function) \ + f(parameter) + +enum id_type { + #define f(n) n, + IDENTIFIER_TYPES(f) + #undef f +}; + +#define EXPRESSION_TYPES( f ) \ + f(nop) f(string) f(number) f(idnt) \ + f(add) \ + f(neg) \ + f(l_not) \ + f(eq) \ + f(lt) \ + f(comma) \ + f(fcall) \ + f(ref) \ + f(deref) \ + f(mul) \ + f(divide) \ + f(l_and) \ + f(b_and) \ + f(l_or) \ + f(b_or) \ + f(b_xor) \ + f(mod) \ + f(b_not) \ + f(set) \ + f(decl) \ + f(shift_l) \ + f(shift_r) \ + f(ifnz) \ + f(ret) \ + f(loop) + +enum e_type { + #define f(n) n, + EXPRESSION_TYPES(f) + #undef f +}; + +struct expression { + enum e_type type; + enum id_type id_type; + int constant; + int declares; + union { + char *str; + int64_t i; + }; + size_t args_count; + struct expression *args; +}; + +#define f(n) \ + inline struct expression *e_##n() {} + +struct identifiers{ + size_t count; + struct expression *identifiers; +}; +struct identifiers *ids = NULL; +struct expression *defid(char *name) +{ + if (!ids) + ids = calloc(sizeof(struct identifiers), 1); + ids->count++; + ids->identifiers = realloc(ids->identifiers, sizeof(struct expression) * ids->count); + + ids->identifiers[ids->count-1].type = idnt; + ids->identifiers[ids->count-1].id_type = undefined; + ids->identifiers[ids->count-1].constant = 1; + ids->identifiers[ids->count-1].str = malloc(strlen(name) + 1); + + strcpy(ids->identifiers[ids->count-1].str, name); + free(name); + + return &ids->identifiers[ids->count-1]; +} + +struct expression *find_idnt(char *name) +{ + struct expression *r = NULL; + size_t i; + for (i = 0; i < ids->count; i++) { + if (!strcmp(ids->identifiers[i].str, name)) { + r = &ids->identifiers[i]; + break; + } + } + if (!r) yyerror("undefined symbol"); + return r; +} + +struct expression *ZERO = &(struct expression){ .type = number, .constant = 1 }; +struct expression *final; + +struct expression *e(enum e_type type, struct expression *arg1, struct expression *arg2, int nofree) +{ + struct expression *r = calloc(sizeof(struct expression), 1); + r->type = type; + if (r->type == comma && (arg1 && arg1->type == comma || arg2 && arg2->type == comma)) { + if (arg1 && arg1->type == comma && arg2 && arg2->type == comma) { + r->args_count = arg1->args_count + arg2->args_count; + r->args = calloc(sizeof(struct expression), r->args_count); + memmove(r->args, arg1->args, sizeof(struct expression) * arg1->args_count); + memmove(&r->args[arg1->args_count], arg2->args, sizeof(struct expression) * arg2->args_count); + } else if (arg1 && arg1->type == comma) { + r->args_count = arg1->args_count + 1; + r->args = calloc(sizeof(struct expression), r->args_count); + memmove(r->args, arg1->args, sizeof(struct expression) * arg1->args_count); + if (arg2) memmove(&r->args[arg1->args_count], arg2, sizeof(struct expression)); + } else if (arg2 && arg2->type == comma) { + r->args_count = arg2->args_count + 1; + r->args = calloc(sizeof(struct expression), r->args_count); + if (arg1) memmove(r->args, arg1, sizeof(struct expression)); + memmove(&r->args[arg1 ? 1 : 0], arg2->args, sizeof(struct expression) * arg2->args_count); + } + } else if (r->type == number) { + r->i = (int64_t) arg1; + } else if (r->type == string && arg1) { + r->str = calloc(strlen((char *)arg1) + 1, 1); + strcpy(r->str, (char *)arg1); + } else { + r->args_count = (arg1 ? 1 : 0) + (arg2 ? 1 : 0); + r->args = calloc(sizeof(struct expression), r->args_count); + if (arg1) memmove(r->args, arg1, sizeof(struct expression)); + if (arg2) memmove(&r->args[arg1 ? 1 : 0], arg2, sizeof(struct expression)); + } + if (!nofree && arg1 && !arg1->constant) free(arg1); + if (!nofree && arg2 && !arg2->constant) free(arg2); + return r; +} +int curr_decl = 0; +struct expression *defun(struct expression *identifier, struct expression *params, struct expression *body) +{ + struct expression *r = calloc(sizeof(struct expression), 1); + r->type = fcall; + r->declares = curr_decl; + curr_decl = 0; + r->args_count = params ? 3 : 2; + r->args = calloc(sizeof(struct expression), r->args_count); + memmove(r->args, identifier, sizeof(struct expression)); + if (params) memmove(&r->args[1], params, sizeof(struct expression)); + memmove(&r->args[r->args_count-1], body, sizeof(struct expression)); + if (params) free(params); + if (body) free(body); + return r; +} + +struct expression *move(struct expression *e) +{ + struct expression *r = calloc(sizeof(struct expression), 1); + memmove(r, e, sizeof(struct expression)); + free(e); + return r; +} +struct expression *copy(struct expression *e) +{ + struct expression *r = calloc(sizeof(struct expression), 1); + memcpy(r, e, sizeof(struct expression)); + return r; +} +%} + +%union {struct expression *expr; char *str; int64_t i;} + +%token END 0 +%token <str> IDENTIFIER STRING +%token <i> NUMBER +%token INC "++" DEC "--" +%token EQ "==" LTEQ "<=" GTEQ ">=" AND "&&" OR "||" NOT_EQ "!=" +%token SHIFT_L "<<" SHIFT_R ">>" +%token ADD_EQ "+=" SUB_EQ "-=" MUL_EQ "*=" DIV_EQ "/=" MOD_EQ "%=" +%token OR_EQ "|=" AND_EQ "&=" XOR_EQ "^=" SHL_EQ "<<=" SHR_EQ ">>=" +%token RETURN "return" IF "if" ELSE "else" WHILE "while" DECL "decl" + +%left ',' +%right '?' ':' '=' "+=" "-=" "*=" "/=" "%=" "&=" "|=" "^=" "<<=" ">>=" +%left "||" +%left "&&" +%left '|' +%left '^' +%left '&' +%left "==" "!=" +%left '<' '>' "<=" ">=" +%left "<<" ">>" +%left '+' '-' +%left '*' '/' '%' +%right REF NEG "++" "--" '!' '~' +%left '(' '[' + +%type <expr> expression exprs c_expr var_def var_defs statement comp_stmt parameters parameter identifier +%type <str> str + +%% + +program: program function +| function +; + +function: identifier '(' parameters ')' statement {$1->id_type = function; final = defun(copy($1), $3, $5);} +; + +parameters: parameter {$$ = move($1);} +| %empty {$$ = NULL;} +; + +parameter: parameter ',' parameter {$$ = e(comma, $1, $3, 0);} +| identifier {$1->id_type = parameter; $$ = copy($1);} +; + +statement: "return" exprs ';' {$$ = e(ret, $2, NULL, 0);} +| "while" '(' expression ')' statement {$$ = e(loop, $3, $5, 0);} +| "if" '(' expression ')' statement {$$ = e(ifnz, $3, $5, 0);} +| "else" statement {$$ = move($2);} +| exprs ';' {$$ = move($1);} +| comp_stmt '}' {$$ = move($1);} +| ';' {$$ = e(nop, NULL, NULL, 1);} +; + +comp_stmt: '{' {$$ = e(comma, NULL, NULL, 1);} +| comp_stmt statement {$$ = e(comma, $1, $2, 0);} +; + +var_defs: "decl" var_def {$$ = e(comma, $2, NULL, 0);} +| var_defs ',' var_def {$$ = e(comma, $1, $3, 0);} +; + +var_def: identifier {curr_decl++; $1->id_type = variable; $$ = e(decl, $1, NULL, 0);} +| identifier '=' expression {curr_decl++; $1->id_type = variable; $$ = e(comma, e(decl, $1, NULL, 0), e(set, $1, $3, 0), 0);} +; + +identifier: IDENTIFIER {$$ = copy(defid($1));} +; + +exprs: c_expr {$$ = move($1);} +| var_defs {$$ = move($1);} +; + +c_expr: expression {$$ = move($1);} +| c_expr ',' expression {$$ = e(comma, $1, $3, 0);} +; + +expression: IDENTIFIER {$$ = copy(find_idnt($1));} +| NUMBER {$$ = e(number, $1, NULL, 1);} +| str {$$ = e(string, $1, NULL, 0);} +| expression '[' expression ']' {printf("uh-oh\n");$$ = NULL;} +| '(' exprs ')' {$$ = move($2);} +| expression '(' ')' {printf("uh-oh\n");$$ = NULL;} +| expression '(' c_expr ')' {printf("uh-oh\n");$$ = NULL;} +| expression '+' expression {$$ = e(add, $1, $3, 0);} +| expression '-' expression {$$ = e(add, $1, e(neg, $3, NULL, 0), 0);} +| expression '*' expression {$$ = e(mul, $1, $3, 0);} +| expression '/' expression {printf("uh-oh\n");$$ = NULL;} +| expression '%' expression {$$ = e(mod, $1, $3, 0);} +| expression '&' expression {printf("uh-oh\n");$$ = NULL;} +| expression '|' expression {printf("uh-oh\n");$$ = NULL;} +| expression '^' expression {printf("uh-oh\n");$$ = NULL;} +| expression "<<" expression {$$ = e(shift_l, $1, $3, 0);} +| expression ">>" expression {$$ = e(shift_r, $1, $3, 0);} +| expression '=' expression {$$ = e(set, $1, $3, 0);} +| expression "+=" expression {printf("uh-oh\n");$$ = NULL;} +| expression "-=" expression {printf("uh-oh\n");$$ = NULL;} +| expression "*=" expression {printf("uh-oh\n");$$ = NULL;} +| expression "/=" expression {printf("uh-oh\n");$$ = NULL;} +| expression "%=" expression {printf("uh-oh\n");$$ = NULL;} +| expression "&=" expression {printf("uh-oh\n");$$ = NULL;} +| expression "|=" expression {printf("uh-oh\n");$$ = NULL;} +| expression "^=" expression {printf("uh-oh\n");$$ = NULL;} +| expression "<<=" expression {printf("uh-oh\n");$$ = NULL;} +| expression ">>=" expression {printf("uh-oh\n");$$ = NULL;} +| expression "==" expression {printf("uh-oh\n");$$ = NULL;} +| expression "!=" expression {printf("uh-oh\n");$$ = NULL;} +| expression '<' expression {$$ = e(lt, $1, $3, 0);} +| expression '>' expression {$$ = e(lt, $3, $1, 0);} +| expression "<=" expression {printf("uh-oh\n");$$ = NULL;} +| expression ">=" expression {printf("uh-oh\n");$$ = NULL;} +| '!' expression {$$ = e(l_not, $2, NULL, 0);} +| '~' expression {printf("uh-oh\n");$$ = NULL;} +| '*' expression %prec REF {printf("uh-oh\n");$$ = NULL;} +| '&' expression %prec REF {printf("uh-oh\n");$$ = NULL;} +| '-' expression %prec NEG {printf("uh-oh\n");$$ = NULL;} +| '+' expression %prec NEG {printf("uh-oh\n");$$ = NULL;} +| "++" expression {printf("uh-oh\n");$$ = NULL;} +| "--" expression {printf("uh-oh\n");$$ = NULL;} +| expression "++" {printf("uh-oh\n");$$ = NULL;} +| expression "--" {printf("uh-oh\n");$$ = NULL;} +; + +str: str STRING {$$ = calloc(strlen($1) + strlen($2) + 1, 1); strcpy($$, $1); strcpy(&$$[strlen($$)], $2); free($1); free($2);} +| STRING {$$ = $1;} +; + +%% +#include <stdio.h> + +char *exp_str[] = { +#define f(e) #e, +EXPRESSION_TYPES(f) +#undef f +}; + +void stringify(struct expression *e, int p_indent, int last, char *name, uint64_t idmap) +{ + int i, indent = p_indent + 3; + if (last) idmap |= (1 << p_indent/3); + for (i = 0; i < p_indent; i+=3) printf("%s ", (idmap & (1 << (i/3))) ? " " : "│"); + if (p_indent >= 0) printf("%s━ ", last ? "┕": "┝"); + switch(e->type) { + case string: + printf("%s: \"%s\"\n", name ? name : "string", e->str); + break; + case idnt: + printf("%s: %s\n", name ? name : "ident", e->str); + break; + case number: + printf("%s: %li\n", name ? name : "number", e->i); + break; + default: + printf("%s:\n", exp_str[e->type]); + for (i = 0; i < e->args_count;) { + stringify(&e->args[i++], indent, i+1 == e->args_count, NULL, idmap); + } + break; + } +} + +volatile void breakpoint(void) { } + +struct storage { + int type; + char *var; + char rbp; +}; +struct storage regs[1024]; +int next_reg; +uint16_t used_regs = 0; +int next_rbp = 8; + +char mod_rm(char in) +{ + +} + +int compile_tree(FILE *output, struct expression *tree) +{ + int i, j, k, reg = -1; + char out[1024]; + switch (tree->type) { + case fcall: + out[0] = 0x55; + out[1] = 0x48; + out[2] = 0x89; + out[3] = 0xE5; + + out[4] = 0x48; + out[5] = 0x81; + out[6] = 0xEC; + *(int *) &out[7] = tree->declares*8; + write(1, out, 11); + compile_tree(output, &tree->args[tree->args_count-1]); + out[0] = 0xc9; + out[1] = 0xc3; + write(1, out, 2); + break; + case comma: + for (i = 0; i < tree->args_count; i++) { + j = compile_tree(output, &tree->args[i]); + } + reg = j; + break; + case add: + j = compile_tree(output, tree->args); + k = compile_tree(output, &tree->args[1]); + for (i = 0; i < 16; i++) { + if (!(used_regs & (1 << i))) { + reg = i; + used_regs |= (1 << reg); + break; + } + } + if (reg < 0) { + fprintf(stderr, "Out of registers?!? fix me\n"); + exit(1); + } + out[0] = 0x48; + if (regs[reg].type == 1) { + out[1] = 0x01; + out[2] = 0x40 + j + 8*k; + //write(1, out, 3); + } + out[0] = 0x48; + out[1] = 0x8b; + if (tree->args->type == idnt) { + out[2] = 0x45 + reg*8; + out[3] = -regs[j].rbp; + out[4] = 0x48; + out[5] = 0x03; + if (tree->args[1].type == idnt) { + out[6] = 0x45 + reg*8; + out[7] = -regs[k].rbp; + write(1, out, 8); + } else { + out[6] = 0xC0 + reg*8 + k; + write(1, out, 7); + } + } else { + } + //used_regs &= ~(1 << j); + used_regs &= ~(1 << k); + //printf("ADD R%i R%i R%i\n", reg, j, k); + break; + case mul: + j = compile_tree(output, tree->args); + k = compile_tree(output, &tree->args[1]); + for (i = 0; i < 16; i++) { + if (!(used_regs & (1 << i))) { + reg = i; + used_regs |= (1 << reg); + break; + } + } + if (reg < 0) { + fprintf(stderr, "Out of registers?!? fix me\n"); + exit(1); + } + out[0] = 0x48; + out[1] = 0x8b; + if (tree->args->type == idnt) { + out[2] = 0x45 + reg*8; + out[3] = -regs[j].rbp; + out[4] = 0x48; + out[5] = 0x0F; + out[6] = 0xAF; + if (tree->args[1].type == idnt) { + out[7] = 0x45 + reg*8; + out[8] = -regs[k].rbp; + write(1, out, 9); + } else { + out[7] = 0xC0 + reg*8 + k; + write(1, out, 8); + } + } else { + } + used_regs &= ~(1 << j); + used_regs &= ~(1 << k); + //printf("MUL R%i R%i R%i\n", reg, j, k); + break; + case decl: + reg = next_reg++; + regs[reg].type = 1; + regs[reg].rbp = next_rbp; + next_rbp += 8; + regs[reg].var = tree->args->str; + //printf("INIT R%i ; %s\n", reg, regs[reg]); + break; + case set: + reg = compile_tree(output, tree->args); + j = compile_tree(output, &tree->args[1]); + used_regs &= ~(1 << j); + out[0] = 0x48; + if (regs[reg].type == 1) { + out[1] = 0x89; + out[2] = 0x45 + 8*j; + out[3] = -regs[reg].rbp; + write(1, out, 4); + } + //printf("STR R%i R%i\n", reg, j); + break; + case idnt: + for (i = 0; i < 1024; i++) { + if (regs[i].type != 1) continue; + if (!strcmp(regs[i].var, tree->str)) break; + } + if (i == 1024) break; + reg = i; + break; + case number: + for (i = 0; i < 16; i++) { + if (!(used_regs & (1 << i))) { + reg = i; + used_regs |= (1 << reg); + break; + } + } + if (reg < 0) { + fprintf(stderr, "Out of registers?!? fix me\n"); + exit(1); + } + out[0] = 0x48 | (reg > 7 ? 1 : 0); + out[1] = 0xb8 + (reg % 8); + *(int64_t *) &out[2] = tree->i; + write(1, out, 10); + //printf("IMME R%i %li\n", reg, tree->i); + break; + case string: + reg = next_reg++; + //printf("IMME R%i %li\n", reg, tree->str); + break; + case ret: + reg = compile_tree(output, tree->args); + out[0] = 0x48; + if (regs[reg].type == 1) { + out[1] = 0x8b; + out[2] = 0x45; + out[3] = -regs[reg].rbp; + write(1, out, 4); + } + /* + out[3] = 0x48; + out[4] = 0x48; + out[5] = 0x48; + out[6] = 0x48; + out[7] = 0x48; + out[8] = 0x48; + out[9] = 0x48; + out[10] = 0x48; + out[0] = 0x48; + out[0] = 0x48; + out[0] = 0x48; + */ + } + return reg; +} + +int main(int argc, char **argv) +{ + if (argc == 1) fname = "stdin"; + else { + fname = argv[1]; + yyin = fopen(fname, "r"); + } + yyparse(); + breakpoint(); + //stringify(final, -3, 0, NULL, 0); + compile_tree(stdout, final); +}