commit f2dc46ee6498b0d3cf6b129e493e16666b147c21
Author: William Djupström <william@deepztream.com>
Date: Mon, 25 Feb 2019 23:31:32 +0000
Initial commit
Diffstat:
5 files changed, 630 insertions(+), 0 deletions(-)
diff --git a/Makefile b/Makefile
@@ -0,0 +1,11 @@
+compiler: parse.c lex.c
+ gcc -g -o $@ parse.c lex.c
+
+parse.c: parse.y
+ bison --defines=parse.tab.h --report=all -o $@ $<
+
+lex.c: lex.l
+ flex --header-file=lex.h -o $@ $<
+
+clean:
+ rm -f *.o parse.tab.h lex.h compiler parse.c lex.c parse.output
diff --git a/examples/example.1 b/examples/example.1
@@ -0,0 +1,7 @@
+_start() {
+ decl a,b,c;
+ a = 5;
+ b = 30;
+ c = a + b * 2;
+ return c;
+}
diff --git a/examples/example.2 b/examples/example.2
@@ -0,0 +1,9 @@
+exp(b, e) {
+ decl r = 1;
+ if (!e) return 1;
+ while (e > 1) {
+ if (!(e % 2)) b = b * b, e = e >> 1;
+ if (e % 2) e = e - 1, r = r * b;
+ }
+ return b * r;
+}
diff --git a/lex.l b/lex.l
@@ -0,0 +1,46 @@
+%{
+#include "tokens.h"
+#include "parse.tab.h"
+extern int line;
+%}
+
+%%
+"return" {return RETURN;}
+"if" {return IF;}
+"else" {return ELSE;}
+"while" {return WHILE;}
+"decl" {return DECL;}
+
+"++" {return INC;}
+"--" {return DEC;}
+"+=" {return ADD_EQ;}
+"-=" {return SUB_EQ;}
+"*=" {return MUL_EQ;}
+"/=" {return DIV_EQ;}
+"%=" {return MOD_EQ;}
+"&=" {return AND_EQ;}
+"|=" {return OR_EQ;}
+"^=" {return XOR_EQ;}
+"<<=" {return SHL_EQ;}
+">>=" {return SHR_EQ;}
+"==" {return EQ;}
+"!=" {return NOT_EQ;}
+"<=" {return GTEQ;}
+">=" {return LTEQ;}
+"&&" {return AND;}
+"||" {return OR;}
+"<<" {return SHIFT_L;}
+">>" {return SHIFT_R;}
+
+[0-9.]+ {yylval.i = atol(yytext); return NUMBER;}
+[a-zA-Z_][a-zA-Z0-9_]* {yylval.str = calloc(strlen(yytext)+1, 1); strcpy(yylval.str, yytext); return IDENTIFIER;}
+\"[^\"]*\" {yytext[strlen(yytext) - 1] = 0; yylval.str = calloc(strlen(yytext), 1); strcpy(yylval.str, yytext+1); return STRING;}
+[ \t]+ {;}
+"\r\n" {line++;}
+[\r\n] {line++;}
+. {return *yytext;}
+%%
+/*
+. {return *yytext;}
+%%
+*/
diff --git a/parse.y b/parse.y
@@ -0,0 +1,557 @@
+%define parse.error verbose
+
+%{
+#include "lex.h"
+int line = 1;
+char *fname;
+int yywrap(void){ return 1; }
+int yylex(void);
+void yyerror(char *s)
+{
+ fprintf(stderr, "%s:%i:%s\n", fname, line, s);
+ exit(1);
+}
+
+#define IDENTIFIER_TYPES( f ) \
+ f(undefined) \
+ f(variable) \
+ f(function) \
+ f(parameter)
+
+enum id_type {
+ #define f(n) n,
+ IDENTIFIER_TYPES(f)
+ #undef f
+};
+
+#define EXPRESSION_TYPES( f ) \
+ f(nop) f(string) f(number) f(idnt) \
+ f(add) \
+ f(neg) \
+ f(l_not) \
+ f(eq) \
+ f(lt) \
+ f(comma) \
+ f(fcall) \
+ f(ref) \
+ f(deref) \
+ f(mul) \
+ f(divide) \
+ f(l_and) \
+ f(b_and) \
+ f(l_or) \
+ f(b_or) \
+ f(b_xor) \
+ f(mod) \
+ f(b_not) \
+ f(set) \
+ f(decl) \
+ f(shift_l) \
+ f(shift_r) \
+ f(ifnz) \
+ f(ret) \
+ f(loop)
+
+enum e_type {
+ #define f(n) n,
+ EXPRESSION_TYPES(f)
+ #undef f
+};
+
+struct expression {
+ enum e_type type;
+ enum id_type id_type;
+ int constant;
+ int declares;
+ union {
+ char *str;
+ int64_t i;
+ };
+ size_t args_count;
+ struct expression *args;
+};
+
+#define f(n) \
+ inline struct expression *e_##n() {}
+
+struct identifiers{
+ size_t count;
+ struct expression *identifiers;
+};
+struct identifiers *ids = NULL;
+struct expression *defid(char *name)
+{
+ if (!ids)
+ ids = calloc(sizeof(struct identifiers), 1);
+ ids->count++;
+ ids->identifiers = realloc(ids->identifiers, sizeof(struct expression) * ids->count);
+
+ ids->identifiers[ids->count-1].type = idnt;
+ ids->identifiers[ids->count-1].id_type = undefined;
+ ids->identifiers[ids->count-1].constant = 1;
+ ids->identifiers[ids->count-1].str = malloc(strlen(name) + 1);
+
+ strcpy(ids->identifiers[ids->count-1].str, name);
+ free(name);
+
+ return &ids->identifiers[ids->count-1];
+}
+
+struct expression *find_idnt(char *name)
+{
+ struct expression *r = NULL;
+ size_t i;
+ for (i = 0; i < ids->count; i++) {
+ if (!strcmp(ids->identifiers[i].str, name)) {
+ r = &ids->identifiers[i];
+ break;
+ }
+ }
+ if (!r) yyerror("undefined symbol");
+ return r;
+}
+
+struct expression *ZERO = &(struct expression){ .type = number, .constant = 1 };
+struct expression *final;
+
+struct expression *e(enum e_type type, struct expression *arg1, struct expression *arg2, int nofree)
+{
+ struct expression *r = calloc(sizeof(struct expression), 1);
+ r->type = type;
+ if (r->type == comma && (arg1 && arg1->type == comma || arg2 && arg2->type == comma)) {
+ if (arg1 && arg1->type == comma && arg2 && arg2->type == comma) {
+ r->args_count = arg1->args_count + arg2->args_count;
+ r->args = calloc(sizeof(struct expression), r->args_count);
+ memmove(r->args, arg1->args, sizeof(struct expression) * arg1->args_count);
+ memmove(&r->args[arg1->args_count], arg2->args, sizeof(struct expression) * arg2->args_count);
+ } else if (arg1 && arg1->type == comma) {
+ r->args_count = arg1->args_count + 1;
+ r->args = calloc(sizeof(struct expression), r->args_count);
+ memmove(r->args, arg1->args, sizeof(struct expression) * arg1->args_count);
+ if (arg2) memmove(&r->args[arg1->args_count], arg2, sizeof(struct expression));
+ } else if (arg2 && arg2->type == comma) {
+ r->args_count = arg2->args_count + 1;
+ r->args = calloc(sizeof(struct expression), r->args_count);
+ if (arg1) memmove(r->args, arg1, sizeof(struct expression));
+ memmove(&r->args[arg1 ? 1 : 0], arg2->args, sizeof(struct expression) * arg2->args_count);
+ }
+ } else if (r->type == number) {
+ r->i = (int64_t) arg1;
+ } else if (r->type == string && arg1) {
+ r->str = calloc(strlen((char *)arg1) + 1, 1);
+ strcpy(r->str, (char *)arg1);
+ } else {
+ r->args_count = (arg1 ? 1 : 0) + (arg2 ? 1 : 0);
+ r->args = calloc(sizeof(struct expression), r->args_count);
+ if (arg1) memmove(r->args, arg1, sizeof(struct expression));
+ if (arg2) memmove(&r->args[arg1 ? 1 : 0], arg2, sizeof(struct expression));
+ }
+ if (!nofree && arg1 && !arg1->constant) free(arg1);
+ if (!nofree && arg2 && !arg2->constant) free(arg2);
+ return r;
+}
+int curr_decl = 0;
+struct expression *defun(struct expression *identifier, struct expression *params, struct expression *body)
+{
+ struct expression *r = calloc(sizeof(struct expression), 1);
+ r->type = fcall;
+ r->declares = curr_decl;
+ curr_decl = 0;
+ r->args_count = params ? 3 : 2;
+ r->args = calloc(sizeof(struct expression), r->args_count);
+ memmove(r->args, identifier, sizeof(struct expression));
+ if (params) memmove(&r->args[1], params, sizeof(struct expression));
+ memmove(&r->args[r->args_count-1], body, sizeof(struct expression));
+ if (params) free(params);
+ if (body) free(body);
+ return r;
+}
+
+struct expression *move(struct expression *e)
+{
+ struct expression *r = calloc(sizeof(struct expression), 1);
+ memmove(r, e, sizeof(struct expression));
+ free(e);
+ return r;
+}
+struct expression *copy(struct expression *e)
+{
+ struct expression *r = calloc(sizeof(struct expression), 1);
+ memcpy(r, e, sizeof(struct expression));
+ return r;
+}
+%}
+
+%union {struct expression *expr; char *str; int64_t i;}
+
+%token END 0
+%token <str> IDENTIFIER STRING
+%token <i> NUMBER
+%token INC "++" DEC "--"
+%token EQ "==" LTEQ "<=" GTEQ ">=" AND "&&" OR "||" NOT_EQ "!="
+%token SHIFT_L "<<" SHIFT_R ">>"
+%token ADD_EQ "+=" SUB_EQ "-=" MUL_EQ "*=" DIV_EQ "/=" MOD_EQ "%="
+%token OR_EQ "|=" AND_EQ "&=" XOR_EQ "^=" SHL_EQ "<<=" SHR_EQ ">>="
+%token RETURN "return" IF "if" ELSE "else" WHILE "while" DECL "decl"
+
+%left ','
+%right '?' ':' '=' "+=" "-=" "*=" "/=" "%=" "&=" "|=" "^=" "<<=" ">>="
+%left "||"
+%left "&&"
+%left '|'
+%left '^'
+%left '&'
+%left "==" "!="
+%left '<' '>' "<=" ">="
+%left "<<" ">>"
+%left '+' '-'
+%left '*' '/' '%'
+%right REF NEG "++" "--" '!' '~'
+%left '(' '['
+
+%type <expr> expression exprs c_expr var_def var_defs statement comp_stmt parameters parameter identifier
+%type <str> str
+
+%%
+
+program: program function
+| function
+;
+
+function: identifier '(' parameters ')' statement {$1->id_type = function; final = defun(copy($1), $3, $5);}
+;
+
+parameters: parameter {$$ = move($1);}
+| %empty {$$ = NULL;}
+;
+
+parameter: parameter ',' parameter {$$ = e(comma, $1, $3, 0);}
+| identifier {$1->id_type = parameter; $$ = copy($1);}
+;
+
+statement: "return" exprs ';' {$$ = e(ret, $2, NULL, 0);}
+| "while" '(' expression ')' statement {$$ = e(loop, $3, $5, 0);}
+| "if" '(' expression ')' statement {$$ = e(ifnz, $3, $5, 0);}
+| "else" statement {$$ = move($2);}
+| exprs ';' {$$ = move($1);}
+| comp_stmt '}' {$$ = move($1);}
+| ';' {$$ = e(nop, NULL, NULL, 1);}
+;
+
+comp_stmt: '{' {$$ = e(comma, NULL, NULL, 1);}
+| comp_stmt statement {$$ = e(comma, $1, $2, 0);}
+;
+
+var_defs: "decl" var_def {$$ = e(comma, $2, NULL, 0);}
+| var_defs ',' var_def {$$ = e(comma, $1, $3, 0);}
+;
+
+var_def: identifier {curr_decl++; $1->id_type = variable; $$ = e(decl, $1, NULL, 0);}
+| identifier '=' expression {curr_decl++; $1->id_type = variable; $$ = e(comma, e(decl, $1, NULL, 0), e(set, $1, $3, 0), 0);}
+;
+
+identifier: IDENTIFIER {$$ = copy(defid($1));}
+;
+
+exprs: c_expr {$$ = move($1);}
+| var_defs {$$ = move($1);}
+;
+
+c_expr: expression {$$ = move($1);}
+| c_expr ',' expression {$$ = e(comma, $1, $3, 0);}
+;
+
+expression: IDENTIFIER {$$ = copy(find_idnt($1));}
+| NUMBER {$$ = e(number, $1, NULL, 1);}
+| str {$$ = e(string, $1, NULL, 0);}
+| expression '[' expression ']' {printf("uh-oh\n");$$ = NULL;}
+| '(' exprs ')' {$$ = move($2);}
+| expression '(' ')' {printf("uh-oh\n");$$ = NULL;}
+| expression '(' c_expr ')' {printf("uh-oh\n");$$ = NULL;}
+| expression '+' expression {$$ = e(add, $1, $3, 0);}
+| expression '-' expression {$$ = e(add, $1, e(neg, $3, NULL, 0), 0);}
+| expression '*' expression {$$ = e(mul, $1, $3, 0);}
+| expression '/' expression {printf("uh-oh\n");$$ = NULL;}
+| expression '%' expression {$$ = e(mod, $1, $3, 0);}
+| expression '&' expression {printf("uh-oh\n");$$ = NULL;}
+| expression '|' expression {printf("uh-oh\n");$$ = NULL;}
+| expression '^' expression {printf("uh-oh\n");$$ = NULL;}
+| expression "<<" expression {$$ = e(shift_l, $1, $3, 0);}
+| expression ">>" expression {$$ = e(shift_r, $1, $3, 0);}
+| expression '=' expression {$$ = e(set, $1, $3, 0);}
+| expression "+=" expression {printf("uh-oh\n");$$ = NULL;}
+| expression "-=" expression {printf("uh-oh\n");$$ = NULL;}
+| expression "*=" expression {printf("uh-oh\n");$$ = NULL;}
+| expression "/=" expression {printf("uh-oh\n");$$ = NULL;}
+| expression "%=" expression {printf("uh-oh\n");$$ = NULL;}
+| expression "&=" expression {printf("uh-oh\n");$$ = NULL;}
+| expression "|=" expression {printf("uh-oh\n");$$ = NULL;}
+| expression "^=" expression {printf("uh-oh\n");$$ = NULL;}
+| expression "<<=" expression {printf("uh-oh\n");$$ = NULL;}
+| expression ">>=" expression {printf("uh-oh\n");$$ = NULL;}
+| expression "==" expression {printf("uh-oh\n");$$ = NULL;}
+| expression "!=" expression {printf("uh-oh\n");$$ = NULL;}
+| expression '<' expression {$$ = e(lt, $1, $3, 0);}
+| expression '>' expression {$$ = e(lt, $3, $1, 0);}
+| expression "<=" expression {printf("uh-oh\n");$$ = NULL;}
+| expression ">=" expression {printf("uh-oh\n");$$ = NULL;}
+| '!' expression {$$ = e(l_not, $2, NULL, 0);}
+| '~' expression {printf("uh-oh\n");$$ = NULL;}
+| '*' expression %prec REF {printf("uh-oh\n");$$ = NULL;}
+| '&' expression %prec REF {printf("uh-oh\n");$$ = NULL;}
+| '-' expression %prec NEG {printf("uh-oh\n");$$ = NULL;}
+| '+' expression %prec NEG {printf("uh-oh\n");$$ = NULL;}
+| "++" expression {printf("uh-oh\n");$$ = NULL;}
+| "--" expression {printf("uh-oh\n");$$ = NULL;}
+| expression "++" {printf("uh-oh\n");$$ = NULL;}
+| expression "--" {printf("uh-oh\n");$$ = NULL;}
+;
+
+str: str STRING {$$ = calloc(strlen($1) + strlen($2) + 1, 1); strcpy($$, $1); strcpy(&$$[strlen($$)], $2); free($1); free($2);}
+| STRING {$$ = $1;}
+;
+
+%%
+#include <stdio.h>
+
+char *exp_str[] = {
+#define f(e) #e,
+EXPRESSION_TYPES(f)
+#undef f
+};
+
+void stringify(struct expression *e, int p_indent, int last, char *name, uint64_t idmap)
+{
+ int i, indent = p_indent + 3;
+ if (last) idmap |= (1 << p_indent/3);
+ for (i = 0; i < p_indent; i+=3) printf("%s ", (idmap & (1 << (i/3))) ? " " : "│");
+ if (p_indent >= 0) printf("%s━ ", last ? "┕": "┝");
+ switch(e->type) {
+ case string:
+ printf("%s: \"%s\"\n", name ? name : "string", e->str);
+ break;
+ case idnt:
+ printf("%s: %s\n", name ? name : "ident", e->str);
+ break;
+ case number:
+ printf("%s: %li\n", name ? name : "number", e->i);
+ break;
+ default:
+ printf("%s:\n", exp_str[e->type]);
+ for (i = 0; i < e->args_count;) {
+ stringify(&e->args[i++], indent, i+1 == e->args_count, NULL, idmap);
+ }
+ break;
+ }
+}
+
+volatile void breakpoint(void) { }
+
+struct storage {
+ int type;
+ char *var;
+ char rbp;
+};
+struct storage regs[1024];
+int next_reg;
+uint16_t used_regs = 0;
+int next_rbp = 8;
+
+char mod_rm(char in)
+{
+
+}
+
+int compile_tree(FILE *output, struct expression *tree)
+{
+ int i, j, k, reg = -1;
+ char out[1024];
+ switch (tree->type) {
+ case fcall:
+ out[0] = 0x55;
+ out[1] = 0x48;
+ out[2] = 0x89;
+ out[3] = 0xE5;
+
+ out[4] = 0x48;
+ out[5] = 0x81;
+ out[6] = 0xEC;
+ *(int *) &out[7] = tree->declares*8;
+ write(1, out, 11);
+ compile_tree(output, &tree->args[tree->args_count-1]);
+ out[0] = 0xc9;
+ out[1] = 0xc3;
+ write(1, out, 2);
+ break;
+ case comma:
+ for (i = 0; i < tree->args_count; i++) {
+ j = compile_tree(output, &tree->args[i]);
+ }
+ reg = j;
+ break;
+ case add:
+ j = compile_tree(output, tree->args);
+ k = compile_tree(output, &tree->args[1]);
+ for (i = 0; i < 16; i++) {
+ if (!(used_regs & (1 << i))) {
+ reg = i;
+ used_regs |= (1 << reg);
+ break;
+ }
+ }
+ if (reg < 0) {
+ fprintf(stderr, "Out of registers?!? fix me\n");
+ exit(1);
+ }
+ out[0] = 0x48;
+ if (regs[reg].type == 1) {
+ out[1] = 0x01;
+ out[2] = 0x40 + j + 8*k;
+ //write(1, out, 3);
+ }
+ out[0] = 0x48;
+ out[1] = 0x8b;
+ if (tree->args->type == idnt) {
+ out[2] = 0x45 + reg*8;
+ out[3] = -regs[j].rbp;
+ out[4] = 0x48;
+ out[5] = 0x03;
+ if (tree->args[1].type == idnt) {
+ out[6] = 0x45 + reg*8;
+ out[7] = -regs[k].rbp;
+ write(1, out, 8);
+ } else {
+ out[6] = 0xC0 + reg*8 + k;
+ write(1, out, 7);
+ }
+ } else {
+ }
+ //used_regs &= ~(1 << j);
+ used_regs &= ~(1 << k);
+ //printf("ADD R%i R%i R%i\n", reg, j, k);
+ break;
+ case mul:
+ j = compile_tree(output, tree->args);
+ k = compile_tree(output, &tree->args[1]);
+ for (i = 0; i < 16; i++) {
+ if (!(used_regs & (1 << i))) {
+ reg = i;
+ used_regs |= (1 << reg);
+ break;
+ }
+ }
+ if (reg < 0) {
+ fprintf(stderr, "Out of registers?!? fix me\n");
+ exit(1);
+ }
+ out[0] = 0x48;
+ out[1] = 0x8b;
+ if (tree->args->type == idnt) {
+ out[2] = 0x45 + reg*8;
+ out[3] = -regs[j].rbp;
+ out[4] = 0x48;
+ out[5] = 0x0F;
+ out[6] = 0xAF;
+ if (tree->args[1].type == idnt) {
+ out[7] = 0x45 + reg*8;
+ out[8] = -regs[k].rbp;
+ write(1, out, 9);
+ } else {
+ out[7] = 0xC0 + reg*8 + k;
+ write(1, out, 8);
+ }
+ } else {
+ }
+ used_regs &= ~(1 << j);
+ used_regs &= ~(1 << k);
+ //printf("MUL R%i R%i R%i\n", reg, j, k);
+ break;
+ case decl:
+ reg = next_reg++;
+ regs[reg].type = 1;
+ regs[reg].rbp = next_rbp;
+ next_rbp += 8;
+ regs[reg].var = tree->args->str;
+ //printf("INIT R%i ; %s\n", reg, regs[reg]);
+ break;
+ case set:
+ reg = compile_tree(output, tree->args);
+ j = compile_tree(output, &tree->args[1]);
+ used_regs &= ~(1 << j);
+ out[0] = 0x48;
+ if (regs[reg].type == 1) {
+ out[1] = 0x89;
+ out[2] = 0x45 + 8*j;
+ out[3] = -regs[reg].rbp;
+ write(1, out, 4);
+ }
+ //printf("STR R%i R%i\n", reg, j);
+ break;
+ case idnt:
+ for (i = 0; i < 1024; i++) {
+ if (regs[i].type != 1) continue;
+ if (!strcmp(regs[i].var, tree->str)) break;
+ }
+ if (i == 1024) break;
+ reg = i;
+ break;
+ case number:
+ for (i = 0; i < 16; i++) {
+ if (!(used_regs & (1 << i))) {
+ reg = i;
+ used_regs |= (1 << reg);
+ break;
+ }
+ }
+ if (reg < 0) {
+ fprintf(stderr, "Out of registers?!? fix me\n");
+ exit(1);
+ }
+ out[0] = 0x48 | (reg > 7 ? 1 : 0);
+ out[1] = 0xb8 + (reg % 8);
+ *(int64_t *) &out[2] = tree->i;
+ write(1, out, 10);
+ //printf("IMME R%i %li\n", reg, tree->i);
+ break;
+ case string:
+ reg = next_reg++;
+ //printf("IMME R%i %li\n", reg, tree->str);
+ break;
+ case ret:
+ reg = compile_tree(output, tree->args);
+ out[0] = 0x48;
+ if (regs[reg].type == 1) {
+ out[1] = 0x8b;
+ out[2] = 0x45;
+ out[3] = -regs[reg].rbp;
+ write(1, out, 4);
+ }
+ /*
+ out[3] = 0x48;
+ out[4] = 0x48;
+ out[5] = 0x48;
+ out[6] = 0x48;
+ out[7] = 0x48;
+ out[8] = 0x48;
+ out[9] = 0x48;
+ out[10] = 0x48;
+ out[0] = 0x48;
+ out[0] = 0x48;
+ out[0] = 0x48;
+ */
+ }
+ return reg;
+}
+
+int main(int argc, char **argv)
+{
+ if (argc == 1) fname = "stdin";
+ else {
+ fname = argv[1];
+ yyin = fopen(fname, "r");
+ }
+ yyparse();
+ breakpoint();
+ //stringify(final, -3, 0, NULL, 0);
+ compile_tree(stdout, final);
+}