compiler

Unnamed repository; edit this file 'description' to name the repository.
git clone https://git.deepztream.com/compiler
Log | Files | Refs

commit 992236e47e3d81d906a71fae6d05cce2dc2ef08f
parent f2dc46ee6498b0d3cf6b129e493e16666b147c21
Author: William Djupström <william@deepztream.com>
Date:   Thu, 28 Feb 2019 17:26:19 +0100

Improved assembly coverage

Diffstat:
Mexamples/example.1 | 6+-----
Mexamples/example.2 | 14++++++--------
Aexamples/example.3 | 4++++
Aexamples/example.4 | 13+++++++++++++
Aexamples/example.5 | 3+++
Aexamples/example.6 | 7+++++++
Mlex.l | 4+++-
Mparse.y | 1479++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-----------
8 files changed, 1325 insertions(+), 205 deletions(-)

diff --git a/examples/example.1 b/examples/example.1 @@ -1,7 +1,3 @@ _start() { - decl a,b,c; - a = 5; - b = 30; - c = a + b * 2; - return c; + exit 42; } diff --git a/examples/example.2 b/examples/example.2 @@ -1,9 +1,7 @@ -exp(b, e) { - decl r = 1; - if (!e) return 1; - while (e > 1) { - if (!(e % 2)) b = b * b, e = e >> 1; - if (e % 2) e = e - 1, r = r * b; - } - return b * r; +_start() { + decl a,b,c; + a = 5; + b = 30; + c = a + b * 2; + exit; } diff --git a/examples/example.3 b/examples/example.3 @@ -0,0 +1,4 @@ +_start() { + decl a = 5, b = 9; + exit b - a; +} diff --git a/examples/example.4 b/examples/example.4 @@ -0,0 +1,13 @@ +exp(b, e) { + decl r = 1; + if (!e) return 1; + while (e > 1) { + if (!(e % 2)) b = b * b, e = e >> 1; + else e = e - 1, r = r * b; + } + return b * r; +} + +_start() { + exit exp(3, 4); +} diff --git a/examples/example.5 b/examples/example.5 @@ -0,0 +1,3 @@ +_start() { + exit 31 % 3; +} diff --git a/examples/example.6 b/examples/example.6 @@ -0,0 +1,7 @@ +_start() { + decl c = 64; + while (c < 64+26) + putc(c = c + 1); + putc(10); + exit(0); +} diff --git a/lex.l b/lex.l @@ -1,5 +1,4 @@ %{ -#include "tokens.h" #include "parse.tab.h" extern int line; %} @@ -10,6 +9,9 @@ extern int line; "else" {return ELSE;} "while" {return WHILE;} "decl" {return DECL;} +"exit" {return EXIT;} +"putc" {return PUTC;} +"write" {return WRITE;} "++" {return INC;} "--" {return DEC;} diff --git a/parse.y b/parse.y @@ -6,7 +6,7 @@ int line = 1; char *fname; int yywrap(void){ return 1; } int yylex(void); -void yyerror(char *s) +void yyerror(const char *s) { fprintf(stderr, "%s:%i:%s\n", fname, line, s); exit(1); @@ -15,7 +15,7 @@ void yyerror(char *s) #define IDENTIFIER_TYPES( f ) \ f(undefined) \ f(variable) \ - f(function) \ + f(func) \ f(parameter) enum id_type { @@ -26,31 +26,17 @@ enum id_type { #define EXPRESSION_TYPES( f ) \ f(nop) f(string) f(number) f(idnt) \ - f(add) \ - f(neg) \ - f(l_not) \ - f(eq) \ - f(lt) \ + f(neg) f(add) f(mul) f(divide) f(mod)\ f(comma) \ - f(fcall) \ - f(ref) \ - f(deref) \ - f(mul) \ - f(divide) \ - f(l_and) \ - f(b_and) \ - f(l_or) \ - f(b_or) \ - f(b_xor) \ - f(mod) \ - f(b_not) \ - f(set) \ - f(decl) \ - f(shift_l) \ - f(shift_r) \ - f(ifnz) \ - f(ret) \ - f(loop) + f(ref) f(deref) \ + f(l_eq) f(l_lt) \ + f(l_and) f(l_or) f(l_not) \ + f(b_and) f(b_or) f(b_not) f(b_xor) \ + f(shift_l) f(shift_r) \ + f(set) f(decl) \ + f(ifnz) f(loop) \ + f(printchar) f(writestring) \ + f(function) f(fcall) f(ret) f(exit_p) enum e_type { #define f(n) n, @@ -63,17 +49,12 @@ struct expression { enum id_type id_type; int constant; int declares; - union { - char *str; - int64_t i; - }; + char *str; + int64_t i; size_t args_count; struct expression *args; }; -#define f(n) \ - inline struct expression *e_##n() {} - struct identifiers{ size_t count; struct expression *identifiers; @@ -154,7 +135,7 @@ int curr_decl = 0; struct expression *defun(struct expression *identifier, struct expression *params, struct expression *body) { struct expression *r = calloc(sizeof(struct expression), 1); - r->type = fcall; + r->type = function; r->declares = curr_decl; curr_decl = 0; r->args_count = params ? 3 : 2; @@ -167,6 +148,13 @@ struct expression *defun(struct expression *identifier, struct expression *param return r; } +void append(struct expression *base, struct expression *arg) +{ + base->args = realloc(base->args, sizeof(struct expression) * (++base->args_count)); + memmove(&base->args[base->args_count-1], arg, sizeof(struct expression)); + free(arg); +} + struct expression *move(struct expression *e) { struct expression *r = calloc(sizeof(struct expression), 1); @@ -193,6 +181,7 @@ struct expression *copy(struct expression *e) %token ADD_EQ "+=" SUB_EQ "-=" MUL_EQ "*=" DIV_EQ "/=" MOD_EQ "%=" %token OR_EQ "|=" AND_EQ "&=" XOR_EQ "^=" SHL_EQ "<<=" SHR_EQ ">>=" %token RETURN "return" IF "if" ELSE "else" WHILE "while" DECL "decl" +%token EXIT "exit" PUTC "putc" WRITE "write" %left ',' %right '?' ':' '=' "+=" "-=" "*=" "/=" "%=" "&=" "|=" "^=" "<<=" ">>=" @@ -209,16 +198,17 @@ struct expression *copy(struct expression *e) %right REF NEG "++" "--" '!' '~' %left '(' '[' -%type <expr> expression exprs c_expr var_def var_defs statement comp_stmt parameters parameter identifier +%type <expr> expression exprs c_expr var_def var_defs statement comp_stmt if_stmt else_stmt +%type <expr> parameters parameter identifier program function %type <str> str %% -program: program function -| function +program: program function {$$ = move($1); append($$, $2); final = $$;} +| function {$$ = e(comma, $1, NULL, 0); final = $$;} ; -function: identifier '(' parameters ')' statement {$1->id_type = function; final = defun(copy($1), $3, $5);} +function: identifier '(' parameters ')' statement {$1->id_type = function; $$ = defun(copy($1), $3, $5);} ; parameters: parameter {$$ = move($1);} @@ -231,15 +221,23 @@ parameter: parameter ',' parameter {$$ = e(comma, $1, $3, 0);} statement: "return" exprs ';' {$$ = e(ret, $2, NULL, 0);} | "while" '(' expression ')' statement {$$ = e(loop, $3, $5, 0);} -| "if" '(' expression ')' statement {$$ = e(ifnz, $3, $5, 0);} -| "else" statement {$$ = move($2);} +| else_stmt {$$ = move($1);} +| if_stmt {$$ = move($1);} | exprs ';' {$$ = move($1);} +| "exit" ';' {$$ = e(exit_p, ZERO, NULL, 0);} +| "exit" expression ';' {$$ = e(exit_p, $2, NULL, 0);} | comp_stmt '}' {$$ = move($1);} | ';' {$$ = e(nop, NULL, NULL, 1);} ; +else_stmt: if_stmt "else" statement {$$ = move($1); append($$, $3);} +; + +if_stmt: "if" '(' expression ')' statement {$$ = e(ifnz, $3, $5, 0);} +; + comp_stmt: '{' {$$ = e(comma, NULL, NULL, 1);} -| comp_stmt statement {$$ = e(comma, $1, $2, 0);} +| comp_stmt statement {$$ = move($1); append($$, $2);}/*$$ = e(comma, $1, $2, 0);}*/ ; var_defs: "decl" var_def {$$ = e(comma, $2, NULL, 0);} @@ -262,16 +260,18 @@ c_expr: expression {$$ = move($1);} ; expression: IDENTIFIER {$$ = copy(find_idnt($1));} -| NUMBER {$$ = e(number, $1, NULL, 1);} -| str {$$ = e(string, $1, NULL, 0);} +| NUMBER {$$ = e(number, (void *)$1, NULL, 1);} +| str {$$ = e(string, (void *)$1, NULL, 0);} | expression '[' expression ']' {printf("uh-oh\n");$$ = NULL;} | '(' exprs ')' {$$ = move($2);} -| expression '(' ')' {printf("uh-oh\n");$$ = NULL;} -| expression '(' c_expr ')' {printf("uh-oh\n");$$ = NULL;} +| expression '(' ')' {$$ = e(fcall, $1, NULL, 0);} +| expression '(' c_expr ')' {$$ = e(fcall, $1, $3, 0);} +| "putc" '(' expression ')' {$$ = e(printchar, $3, NULL, 0);} +| "write" '(' expression ',' expression ')' {$$ = e(writestring, $3, $5, 0);} | expression '+' expression {$$ = e(add, $1, $3, 0);} | expression '-' expression {$$ = e(add, $1, e(neg, $3, NULL, 0), 0);} | expression '*' expression {$$ = e(mul, $1, $3, 0);} -| expression '/' expression {printf("uh-oh\n");$$ = NULL;} +| expression '/' expression {$$ = e(divide, $1, $3, 0);} | expression '%' expression {$$ = e(mod, $1, $3, 0);} | expression '&' expression {printf("uh-oh\n");$$ = NULL;} | expression '|' expression {printf("uh-oh\n");$$ = NULL;} @@ -291,15 +291,15 @@ expression: IDENTIFIER {$$ = copy(find_idnt($1));} | expression ">>=" expression {printf("uh-oh\n");$$ = NULL;} | expression "==" expression {printf("uh-oh\n");$$ = NULL;} | expression "!=" expression {printf("uh-oh\n");$$ = NULL;} -| expression '<' expression {$$ = e(lt, $1, $3, 0);} -| expression '>' expression {$$ = e(lt, $3, $1, 0);} +| expression '<' expression {$$ = e(l_lt, $1, $3, 0);} +| expression '>' expression {$$ = e(l_lt, $3, $1, 0);} | expression "<=" expression {printf("uh-oh\n");$$ = NULL;} | expression ">=" expression {printf("uh-oh\n");$$ = NULL;} | '!' expression {$$ = e(l_not, $2, NULL, 0);} | '~' expression {printf("uh-oh\n");$$ = NULL;} -| '*' expression %prec REF {printf("uh-oh\n");$$ = NULL;} -| '&' expression %prec REF {printf("uh-oh\n");$$ = NULL;} -| '-' expression %prec NEG {printf("uh-oh\n");$$ = NULL;} +| '*' expression %prec REF {$$ = e(deref, $2, NULL, 0);} +| '&' expression %prec REF {$$ = e(ref, $2, NULL, 0);} +| '-' expression %prec NEG {$$ = e(neg, $2, NULL, 0);} | '+' expression %prec NEG {printf("uh-oh\n");$$ = NULL;} | "++" expression {printf("uh-oh\n");$$ = NULL;} | "--" expression {printf("uh-oh\n");$$ = NULL;} @@ -324,20 +324,20 @@ void stringify(struct expression *e, int p_indent, int last, char *name, uint64_ { int i, indent = p_indent + 3; if (last) idmap |= (1 << p_indent/3); - for (i = 0; i < p_indent; i+=3) printf("%s ", (idmap & (1 << (i/3))) ? " " : "│"); - if (p_indent >= 0) printf("%s━ ", last ? "┕": "┝"); + for (i = 0; i < p_indent; i+=3) fprintf(stderr, "%s ", (idmap & (1 << (i/3))) ? " " : "│"); + if (p_indent >= 0) fprintf(stderr, "%s━ ", last ? "┕": "┝"); switch(e->type) { case string: - printf("%s: \"%s\"\n", name ? name : "string", e->str); + fprintf(stderr, "%s: \"%s\"\n", name ? name : "string", e->str); break; case idnt: - printf("%s: %s\n", name ? name : "ident", e->str); + fprintf(stderr, "%s: %s\n", name ? name : "ident", e->str); break; case number: - printf("%s: %li\n", name ? name : "number", e->i); + fprintf(stderr, "%s: %li\n", name ? name : "number", e->i); break; default: - printf("%s:\n", exp_str[e->type]); + fprintf(stderr, "%s:\n", exp_str[e->type]); for (i = 0; i < e->args_count;) { stringify(&e->args[i++], indent, i+1 == e->args_count, NULL, idmap); } @@ -345,7 +345,7 @@ void stringify(struct expression *e, int p_indent, int last, char *name, uint64_ } } -volatile void breakpoint(void) { } +void breakpoint(void) {} struct storage { int type; @@ -353,118 +353,503 @@ struct storage { char rbp; }; struct storage regs[1024]; +const int registers = 16; int next_reg; -uint16_t used_regs = 0; -int next_rbp = 8; +uint16_t used_regs; +int next_rbp; -char mod_rm(char in) -{ - -} +char machine_code[8192] = { 0 }; +int mc_ptr = 0; +char rodata[8192] = { 0 }; +int rd_ptr = 0; +int entry_addr = -1; +long start_addr, rodata_addr; +int func_declares; + +int arg_order[16] = {7, 6, 2, 1, 8, 9, 0, 10, 11, 12, 13, 14, 15, 3, 5}; +int caller_save = 0b0000000011001111; -int compile_tree(FILE *output, struct expression *tree) +int compile_tree(FILE *output, struct expression *tree, int discard_result, int out_reg) { - int i, j, k, reg = -1; + int i, j, k, l, reg = -1; char out[1024]; + int pushed = 0; + struct expression *tmp; + fprintf(stderr, "Type: %s\n", exp_str[tree->type]); switch (tree->type) { - case fcall: - out[0] = 0x55; - out[1] = 0x48; - out[2] = 0x89; - out[3] = 0xE5; - - out[4] = 0x48; - out[5] = 0x81; - out[6] = 0xEC; - *(int *) &out[7] = tree->declares*8; - write(1, out, 11); - compile_tree(output, &tree->args[tree->args_count-1]); - out[0] = 0xc9; - out[1] = 0xc3; - write(1, out, 2); + case function: + next_reg = registers; + used_regs = (1 << 5) | (1 << 4); + next_rbp = 8; + tree->i = mc_ptr; + find_idnt(tree->args->str)->i = mc_ptr + start_addr; + if (!strcmp(tree->args->str, "_start")) { + entry_addr = mc_ptr; + machine_code[mc_ptr++] = 0x55; + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x89; + machine_code[mc_ptr++] = 0xE5; + + if (tree->declares) { + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x81; + machine_code[mc_ptr++] = 0xEC; + *(int *) &machine_code[mc_ptr] = tree->declares*8; + mc_ptr += 4; + } + } else { + if (tree->args[1].type == idnt && tree->args[1].id_type == parameter) { + tree->declares++; + regs[next_reg].type = 1; + regs[next_reg].var = tree->args[1].str; + regs[next_reg].rbp = next_rbp; + next_rbp += 8; + next_reg++; + } else if (tree->args[1].type == comma && tree->args[1].args->type == idnt && tree->args[1].args->id_type == parameter) { + for (i = 0; i < tree->args[1].args_count; i++) { + tree->declares++; + regs[next_reg].type = 1; + regs[next_reg].var = tree->args[1].args[i].str; + regs[next_reg].rbp = next_rbp; + next_rbp += 8; + next_reg++; + } + } + func_declares = tree->declares; + used_regs |= (1 << 5); + machine_code[mc_ptr++] = 0x55; + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x89; + machine_code[mc_ptr++] = 0xE5; + + if (tree->declares) { + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x81; + machine_code[mc_ptr++] = 0xEC; + *(int *) &machine_code[mc_ptr] = tree->declares*8; + mc_ptr += 4; + } + + if (tree->args[1].type == idnt && tree->args[1].id_type == parameter) { + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x89; + machine_code[mc_ptr++] = 0x45 + 8*arg_order[0]; + machine_code[mc_ptr++] = -regs[registers].rbp; + } else if (tree->args[1].type == comma && tree->args[1].args->type == idnt && tree->args[1].args->id_type == parameter) { + for (i = 0; i < tree->args[1].args_count; i++) { + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x89; + machine_code[mc_ptr++] = 0x45 + 8*arg_order[i]; + machine_code[mc_ptr++] = -regs[registers + i].rbp; + } + } + } + compile_tree(output, &tree->args[tree->args_count-1], 1, 0); + if (strcmp(tree->args->str, "_start")) { + machine_code[mc_ptr++] = 0xc9; + machine_code[mc_ptr++] = 0xc3; + } break; case comma: for (i = 0; i < tree->args_count; i++) { - j = compile_tree(output, &tree->args[i]); + j = compile_tree(output, &tree->args[i], (i+1 == tree->args_count ? discard_result : 1), out_reg); } - reg = j; + if (!discard_result) reg = j; break; - case add: - j = compile_tree(output, tree->args); - k = compile_tree(output, &tree->args[1]); - for (i = 0; i < 16; i++) { - if (!(used_regs & (1 << i))) { - reg = i; - used_regs |= (1 << reg); - break; + case fcall: + for (i = 0; i < registers; i++) { + if ((1 << i) & caller_save && (1 << i) & used_regs) { + machine_code[mc_ptr++] = 0x48 | (i > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x50 + (i % 8); + next_rbp += 8; + pushed |= (1 << i); } } - if (reg < 0) { - fprintf(stderr, "Out of registers?!? fix me\n"); - exit(1); + if (tree->args_count == 2) { + if (tree->args[1].type == comma) { + for (i = 0; i < tree->args[1].args_count; i++) { + j = compile_tree(output, &tree->args[1].args[i], 0, arg_order[i]); + if (j != arg_order[i]) { + if (j >= registers) { + machine_code[mc_ptr++] = 0x48 | (arg_order[i] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0x45 + (arg_order[i] % 8) * 8; + machine_code[mc_ptr++] = -regs[j].rbp; + } else { + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0) | (arg_order[i] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x89; + machine_code[mc_ptr++] = 0xC0 + (j % 8) * 8 + arg_order[i]; + used_regs &= ~(1 << j); + } + } + used_regs |= (1 << arg_order[i]); + } + k = i; + } else { + j = compile_tree(output, &tree->args[1], 0, arg_order[0]); + if (j != arg_order[0]) { + used_regs |= (1 << arg_order[0]); + if (j >= registers) { + machine_code[mc_ptr++] = 0x48 | (arg_order[0] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0x45 + (arg_order[0] % 8) * 8; + machine_code[mc_ptr++] = -regs[j].rbp; + } else { + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0) | (arg_order[0] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x89; + machine_code[mc_ptr++] = 0xC0 + (j % 8) * 8 + arg_order[0]; + used_regs &= ~(1 << j); + } + } + k = 1; + } + } + if (!(pushed & 1)) { + used_regs |= 1; + } + j = find_idnt(tree->args->str)->i; + machine_code[mc_ptr++] = 0xe8; + *(int32_t *) &machine_code[mc_ptr] = j - start_addr - mc_ptr - 4; + mc_ptr += 4; + + if (!discard_result) reg = 0; + if (pushed & 1 && !discard_result) { + for (i = 0; i < registers; i++) { + if (!(used_regs & (1 << i))) { + reg = i; + used_regs |= (1 << reg); + break; + } + } + if (i == registers) { + fprintf(stderr, "Out of registers?!? fix me\n"); + exit(1); + } + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x89; + machine_code[mc_ptr++] = 0xC0 + (reg % 8); + } + for (i = registers - 1; i >= 0; i--) { + if (pushed & (1 << i)) { + machine_code[mc_ptr++] = 0x48 | (i > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x58 + (i % 8); + next_rbp -= 8; + pushed &= ~(1 << i); + } else { + if (i != reg) used_regs &= ~(1 << i); + } + } + break; + case add: + j = compile_tree(output, tree->args, discard_result, out_reg); + k = compile_tree(output, &tree->args[1], discard_result, -1); + if (discard_result) { + if (j < registers) used_regs &= ~(1 << j); + if (k < registers) used_regs &= ~(1 << k); + break; + } + reg = j; + if (reg >= registers) { + for (i = 0; i < registers; i++) { + if (!(used_regs & (1 << i))) { + reg = i; + used_regs |= (1 << reg); + break; + } + } + if (i == registers) { + fprintf(stderr, "Out of registers?!? fix me\n"); + exit(1); + } } - out[0] = 0x48; - if (regs[reg].type == 1) { - out[1] = 0x01; - out[2] = 0x40 + j + 8*k; - //write(1, out, 3); - } - out[0] = 0x48; - out[1] = 0x8b; if (tree->args->type == idnt) { - out[2] = 0x45 + reg*8; - out[3] = -regs[j].rbp; - out[4] = 0x48; - out[5] = 0x03; - if (tree->args[1].type == idnt) { - out[6] = 0x45 + reg*8; - out[7] = -regs[k].rbp; - write(1, out, 8); + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0x45 + (reg % 8) * 8; + machine_code[mc_ptr++] = -regs[j].rbp; + } + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x03; + if (tree->args[1].type == idnt) { + machine_code[mc_ptr++] = 0x45 + (reg % 8) * 8; + machine_code[mc_ptr++] = -regs[k].rbp; + } else { + machine_code[mc_ptr-2] |= k > 7 ? 1 : 0; + machine_code[mc_ptr++] = 0xC0 + (reg % 8) * 8 + k; + } + if (k < registers) used_regs &= ~(1 << k); + break; + case neg: + j = compile_tree(output, tree->args, discard_result, out_reg); + if (discard_result) { + if (j < registers) used_regs &= ~(1 << j); + break; + } + reg = j; + if (j >= registers) { + for (i = 0; i < registers; i++) { + if (!(used_regs & (1 << i))) { + reg = i; + used_regs |= (1 << reg); + break; + } + } + if (i == registers) { + fprintf(stderr, "Out of registers?!? fix me\n"); + exit(1); + } + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0x45 + reg*8; + machine_code[mc_ptr++] = -regs[j].rbp; + } + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0xf7; + machine_code[mc_ptr++] = 0xd8 + (reg % 8); + break; + case mul: + j = compile_tree(output, tree->args, discard_result, out_reg); + k = compile_tree(output, &tree->args[1], discard_result, -1); + if (discard_result) { + if (j < registers) used_regs &= ~(1 << j); + if (k < registers) used_regs &= ~(1 << k); + break; + } + reg = j; + if ((out_reg == -1 || out_reg >= registers) && j >= registers) { + for (i = 0; i < registers; i++) { + if (!(used_regs & (1 << i))) { + reg = i; + used_regs |= (1 << reg); + break; + } + } + if (i == registers) { + fprintf(stderr, "Out of registers?!? fix me\n"); + exit(1); + } + if (j >= registers) { + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0x45 + reg*8; + machine_code[mc_ptr++] = -regs[j].rbp; } else { - out[6] = 0xC0 + reg*8 + k; - write(1, out, 7); + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0x45 + reg*8; } + } + + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x0F; + machine_code[mc_ptr++] = 0xAF; + if (k >= registers) { + machine_code[mc_ptr++] = 0x45 + reg*8; + machine_code[mc_ptr++] = -regs[k].rbp; } else { + machine_code[mc_ptr++] = 0xC0 + reg*8 + k; } //used_regs &= ~(1 << j); used_regs &= ~(1 << k); - //printf("ADD R%i R%i R%i\n", reg, j, k); break; - case mul: - j = compile_tree(output, tree->args); - k = compile_tree(output, &tree->args[1]); - for (i = 0; i < 16; i++) { - if (!(used_regs & (1 << i))) { - reg = i; - used_regs |= (1 << reg); - break; + case divide: + if (discard_result) { + j = compile_tree(output, tree->args, 1, -1); + k = compile_tree(output, &tree->args[1], 1, -1); + if (j < registers) used_regs &= ~(1 << j); + break; + } + used_regs |= 1 | 4; + j = compile_tree(output, tree->args, 0, -1); + used_regs &= ~(1 | 4); + if (j != 0) { + if (used_regs & 1) { + fprintf(stderr, "Not implemented yet! (rax)\n"); + exit(1); + } else { + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x8b; + if (j >= registers) { + machine_code[mc_ptr++] = 0x45 + (0 % 8) * 8; + machine_code[mc_ptr++] = -regs[j].rbp; + } else { + machine_code[mc_ptr-2] |= (j > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0xc0 + (j % 8); + } + used_regs |= (1 << 0); } + used_regs &= ~(1 << j); } - if (reg < 0) { - fprintf(stderr, "Out of registers?!? fix me\n"); - exit(1); + if (used_regs & 4) { + fprintf(stderr, "Not implemented yet! (rdx)\n"); } - out[0] = 0x48; - out[1] = 0x8b; - if (tree->args->type == idnt) { - out[2] = 0x45 + reg*8; - out[3] = -regs[j].rbp; - out[4] = 0x48; - out[5] = 0x0F; - out[6] = 0xAF; - if (tree->args[1].type == idnt) { - out[7] = 0x45 + reg*8; - out[8] = -regs[k].rbp; - write(1, out, 9); + used_regs |= 1 << 2; + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x99; + + k = compile_tree(output, &tree->args[1], 0, -1); + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0xF7; + if (tree->args[1].type == idnt) { + machine_code[mc_ptr++] = 0x7d; + machine_code[mc_ptr++] = -regs[k].rbp; + } else { + machine_code[mc_ptr-2] |= (k > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0xf8 + (k % 8); + used_regs &= ~(1 << k); + } + reg = 0; + break; + case mod: + if (discard_result) { + j = compile_tree(output, tree->args, 1, -1); + k = compile_tree(output, &tree->args[1], 1, -1); + if (j < registers) used_regs &= ~(1 << j); + break; + } + used_regs |= 1 | 4; + j = compile_tree(output, tree->args, 0, -1); + used_regs &= ~(1 | 4); + if (j != 0) { + if (used_regs & 1) { + fprintf(stderr, "Not implemented yet! (rax)\n"); + exit(1); } else { - out[7] = 0xC0 + reg*8 + k; - write(1, out, 8); + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x8b; + if (j >= registers) { + machine_code[mc_ptr++] = 0x45 + (0 % 8) * 8; + machine_code[mc_ptr++] = -regs[j].rbp; + } else { + machine_code[mc_ptr-2] |= (j > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0xc0 + (j % 8); + } + used_regs |= (1 << 0); } + used_regs &= ~(1 << j); + } + if (used_regs & 4) { + fprintf(stderr, "Not implemented yet! (rdx)\n"); + } + used_regs |= 1 << 0; + used_regs |= 1 << 2; + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x99; + + k = compile_tree(output, &tree->args[1], 0, -1); + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0xF7; + if (k >= registers) { + machine_code[mc_ptr++] = 0x7d; + machine_code[mc_ptr++] = -regs[k].rbp; } else { + machine_code[mc_ptr-2] |= (k > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0xf8 + (k % 8); + used_regs &= ~(1 << k); } - used_regs &= ~(1 << j); - used_regs &= ~(1 << k); - //printf("MUL R%i R%i R%i\n", reg, j, k); + reg = 2; + break; + case shift_l: + j = compile_tree(output, tree->args, discard_result, out_reg); + k = compile_tree(output, &tree->args[1], discard_result, 1); + if (discard_result) { + if (j < registers) used_regs &= ~(1 << j); + if (k < registers) used_regs &= ~(1 << k); + break; + } + if (k != 1) { + if (used_regs & (1 << 1)) { + fprintf(stderr, "Not implemented yet! (rcx)\n"); + exit(1); + } + used_regs |= (1 << 1); + if (k >= registers) { + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0x4d; + machine_code[mc_ptr++] = -regs[k].rbp; + } else { + machine_code[mc_ptr++] = 0x48 | (k > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0xc8 + (k % 8); + used_regs &= ~(1 << k); + } + k = 1; + } + reg = j; + if (reg >= registers) { + for (i = 0; i < registers; i++) { + if (!(used_regs & (1 << i))) { + reg = i; + used_regs |= (1 << reg); + break; + } + } + if (i == registers) { + fprintf(stderr, "Out of registers?!? fix me\n"); + exit(1); + } + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0x45 + reg*8; + machine_code[mc_ptr++] = -regs[j].rbp; + } + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0xd3; + machine_code[mc_ptr++] = 0xe0 + reg; + + if (k < registers) used_regs &= ~(1 << k); + break; + case shift_r: + j = compile_tree(output, tree->args, discard_result, out_reg); + k = compile_tree(output, &tree->args[1], discard_result, 1); + if (discard_result) { + if (j < registers) used_regs &= ~(1 << j); + if (k < registers) used_regs &= ~(1 << k); + break; + } + if (k != 1) { + if (used_regs & (1 << 1)) { + fprintf(stderr, "Not implemented yet! (rcx)\n"); + exit(1); + } + used_regs |= (1 << 1); + if (k >= registers) { + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0x4d; + machine_code[mc_ptr++] = -regs[k].rbp; + } else { + machine_code[mc_ptr++] = 0x48 | (k > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0xc8 + (k % 8); + used_regs &= ~(1 << k); + } + k = 1; + } + reg = j; + if (reg >= registers) { + for (i = 0; i < registers; i++) { + if (!(used_regs & (1 << i))) { + reg = i; + used_regs |= (1 << reg); + break; + } + } + if (i == registers) { + fprintf(stderr, "Out of registers?!? fix me\n"); + exit(1); + } + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0x45 + reg*8; + machine_code[mc_ptr++] = -regs[j].rbp; + } + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0xd3; + machine_code[mc_ptr++] = 0xe8 + reg; + + if (k < registers) used_regs &= ~(1 << k); break; case decl: reg = next_reg++; @@ -472,79 +857,706 @@ int compile_tree(FILE *output, struct expression *tree) regs[reg].rbp = next_rbp; next_rbp += 8; regs[reg].var = tree->args->str; - //printf("INIT R%i ; %s\n", reg, regs[reg]); break; case set: - reg = compile_tree(output, tree->args); - j = compile_tree(output, &tree->args[1]); + reg = compile_tree(output, tree->args, 0, -1); + j = compile_tree(output, &tree->args[1], 0, out_reg); + if (j < 0) { + fprintf(stderr, "No rvalue for assignment\n"); + exit(1); + } used_regs &= ~(1 << j); - out[0] = 0x48; - if (regs[reg].type == 1) { - out[1] = 0x89; - out[2] = 0x45 + 8*j; - out[3] = -regs[reg].rbp; - write(1, out, 4); - } - //printf("STR R%i R%i\n", reg, j); + if (reg >= registers) { + if (j >= registers) { + for (i = 0; i < registers; i++) { + if (!(used_regs & (1 << i))) { + k = i; + break; + } + } + if (i == registers) { + fprintf(stderr, "Out of registers?!? fix me\n"); + exit(1); + } + machine_code[mc_ptr++] = 0x48 | (k > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0x45 + (k % 8) * 8; + machine_code[mc_ptr++] = -regs[j].rbp; + + machine_code[mc_ptr++] = 0x48 | (k > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x89; + machine_code[mc_ptr++] = 0x45 + (k % 8) * 8; + machine_code[mc_ptr++] = -regs[reg].rbp; + } else { + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x89; + machine_code[mc_ptr++] = 0x45 + (j % 8) * 8; + machine_code[mc_ptr++] = -regs[reg].rbp; + } + } + if (discard_result) reg = -1; + break; + case ref: + if (tree->args->type != idnt) { + fprintf(stderr, "Unable to reference non-identifier\n"); + exit(1); + } + if (discard_result) break; + j = compile_tree(output, tree->args, 0, -1); + reg = out_reg; + if (out_reg < 0 || out_reg >= registers) { + for (i = 0; i < registers; i++) { + if (!(used_regs & (1 << i))) { + k = i; + break; + } + } + if (i == registers) { + fprintf(stderr, "Out of registers?!? fix me\n"); + exit(1); + } + } + used_regs |= (1 << reg); + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x8d; + machine_code[mc_ptr++] = 0x45 + (reg % 8) * 8; + machine_code[mc_ptr++] = -regs[j].rbp; + break; + case deref: break; case idnt: - for (i = 0; i < 1024; i++) { + if (discard_result) break; + for (i = registers; i < 1024; i++) { if (regs[i].type != 1) continue; if (!strcmp(regs[i].var, tree->str)) break; } - if (i == 1024) break; - reg = i; + if (i == 1024) { + fprintf(stderr, "Unable to locate identifier <%s>\n", tree->str); + exit(1); + } + if (out_reg != -1 && out_reg < registers) { + reg = out_reg; + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0x45 + (reg % 8) * 8; + machine_code[mc_ptr++] = -regs[i].rbp; + used_regs |= (1 << reg); + } else { + reg = i; + } break; case number: - for (i = 0; i < 16; i++) { - if (!(used_regs & (1 << i))) { - reg = i; - used_regs |= (1 << reg); - break; + if (discard_result) break; + if (out_reg == -1 || out_reg >= registers) { + for (i = 0; i < registers; i++) { + if (!(used_regs & (1 << i))) { + reg = i; + break; + } } + if (i == registers) { + fprintf(stderr, "Out of registers?!? fix me\n"); + exit(1); + } + } else { + reg = out_reg; } - if (reg < 0) { - fprintf(stderr, "Out of registers?!? fix me\n"); - exit(1); + used_regs |= (1 << reg); + if (tree->i) { + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0xb8 + (reg % 8); + *(int64_t *) &machine_code[mc_ptr] = tree->i; + mc_ptr += 8; + } else { + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 5 : 0); + machine_code[mc_ptr++] = 0x33; + machine_code[mc_ptr++] = 0xc0 + (reg % 8) + (reg % 8) * 8; } - out[0] = 0x48 | (reg > 7 ? 1 : 0); - out[1] = 0xb8 + (reg % 8); - *(int64_t *) &out[2] = tree->i; - write(1, out, 10); - //printf("IMME R%i %li\n", reg, tree->i); break; case string: - reg = next_reg++; - //printf("IMME R%i %li\n", reg, tree->str); + if (discard_result) break; + if (out_reg == -1 || out_reg >= registers) { + for (i = 0; i < registers; i++) { + if (!(used_regs & (1 << i))) { + reg = i; + break; + } + } + if (i == registers) { + fprintf(stderr, "Out of registers?!? fix me\n"); + exit(1); + } + } else { + reg = out_reg; + } + if (!tree->i) { + strcpy(&rodata[rd_ptr], tree->str); + tree->i = rodata_addr + rd_ptr; + rd_ptr += strlen(tree->str) + 1; + } + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0xb8 + (reg % 8); + *(int64_t *) &machine_code[mc_ptr] = tree->i; + mc_ptr += 8; + used_regs |= (1 << reg); + break; + case loop: + machine_code[mc_ptr++] = 0xe9; + k = mc_ptr; + //*(int32_t *) &machine_code[mc_ptr] = 0x00; + mc_ptr += 4; + compile_tree(output, &tree->args[1], 1, -1); + *(int32_t *) &machine_code[k] = mc_ptr - k - 4; + j = compile_tree(output, tree->args, 0, -1); + if (j >= registers) { + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x83; + machine_code[mc_ptr++] = 0x7d; + machine_code[mc_ptr++] = -regs[j].rbp; + } else { + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x83; + machine_code[mc_ptr++] = 0xf8 + (j % 8); + } + machine_code[mc_ptr++] = 0x00; + machine_code[mc_ptr++] = 0x0f; + machine_code[mc_ptr++] = 0x85; + *(int32_t *) &machine_code[mc_ptr] = -(mc_ptr - k); + mc_ptr += 4; + used_regs = (1 << 5) | (1 << 4); + break; + case ifnz: + j = compile_tree(output, tree->args, 0, -1); + if (j >= registers) { + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x83; + machine_code[mc_ptr++] = 0x7d; + machine_code[mc_ptr++] = -regs[j].rbp; + } else { + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x83; + machine_code[mc_ptr++] = 0xf8 + (j % 8); + } + machine_code[mc_ptr++] = 0x00; + machine_code[mc_ptr++] = 0x0f; + machine_code[mc_ptr++] = 0x84; + k = mc_ptr; + //*(int32_t *) &machine_code[mc_ptr] = 0x00; //Offset + mc_ptr += 4; + // True + used_regs = (1 << 5) | (1 << 4); + compile_tree(output, &tree->args[1], 1, -1); + if (tree->args_count == 3) { + machine_code[mc_ptr++] = 0xe9; + l = mc_ptr; + *(int32_t *) &machine_code[mc_ptr] = 0x00; + mc_ptr += 4; + // Else + *(int32_t *) &machine_code[k] = mc_ptr - k - 4; + compile_tree(output, &tree->args[2], 1, -1); + *(int32_t *) &machine_code[l] = mc_ptr - l - 4; + } else { + *(int32_t *) &machine_code[k] = mc_ptr - k - 4; + } + used_regs = (1 << 5) | (1 << 4); + break; + case l_not: + j = compile_tree(output, tree->args, discard_result, out_reg); + if (discard_result) { + if (j < registers) used_regs &= ~(1 << j); + break; + } + reg = j; + if (j >= registers) { + for (i = 0; i < registers; i++) { + if (!(used_regs & (1 << i))) { + reg = i; + used_regs |= (1 << reg); + break; + } + } + if (i == registers) { + fprintf(stderr, "Out of registers?!? fix me\n"); + exit(1); + } + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x83; + machine_code[mc_ptr++] = 0x7d; + machine_code[mc_ptr++] = -regs[j].rbp; + } else { + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x83; + machine_code[mc_ptr++] = 0xf8 + (reg % 8); + } + machine_code[mc_ptr++] = 0x00; + machine_code[mc_ptr++] = 0x74; + machine_code[mc_ptr++] = 0x05; + // Not zero + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 5 : 0); + machine_code[mc_ptr++] = 0x33; + machine_code[mc_ptr++] = 0xc0 + (reg % 8) + (reg % 8) * 8; + machine_code[mc_ptr++] = 0xeb; + machine_code[mc_ptr++] = 0x0a; + // Zero + machine_code[mc_ptr++] = 0x48 + (reg > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0xb8 + (reg % 8); + *(int64_t *) &machine_code[mc_ptr] = 1; + mc_ptr += 8; + break; + case l_lt: + j = compile_tree(output, tree->args, discard_result, out_reg); + k = compile_tree(output, &tree->args[1], discard_result, -1); + if (discard_result) { + if (j < registers) used_regs &= ~(1 << j); + if (k < registers) used_regs &= ~(1 << k); + break; + } + reg = k; + + if (j >= registers) { + if (k >= registers) { + for (i = 0; i < registers; i++) { + if (!(used_regs & (1 << i))) { + reg = i; + used_regs |= (1 << reg); + break; + } + } + if (i == registers) { + fprintf(stderr, "Out of registers?!? fix me\n"); + exit(1); + } + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0x45 + (reg % 8) * 8; + machine_code[mc_ptr++] = -regs[k].rbp; + } else { + //reg = k; + } + k = reg; + machine_code[mc_ptr++] = 0x48 | (k > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x39; + machine_code[mc_ptr++] = 0x45 + (k % 8) * 8; + machine_code[mc_ptr++] = -regs[j].rbp; + } else if (k >= registers) { + for (i = 0; i < registers; i++) { + if (!(used_regs & (1 << i))) { + reg = i; + used_regs |= (1 << reg); + break; + } + } + if (i == registers) { + fprintf(stderr, "Out of registers?!? fix me\n"); + exit(1); + } + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0x45 + (reg % 8) * 8; + machine_code[mc_ptr++] = -regs[k].rbp; + + k = reg; + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 1 : 0) | (k > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x39; + machine_code[mc_ptr++] = 0xc0 + (j % 8) + (k % 8) * 8; + } else { + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 1 : 0) | (k > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x39; + machine_code[mc_ptr++] = 0xc0 + (j % 8) + (k % 8) * 8; + used_regs &= ~(1 << k); + } + machine_code[mc_ptr++] = 0x7c; + machine_code[mc_ptr++] = 0x05; //Offset + // Not less than: + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 5 : 0); + machine_code[mc_ptr++] = 0x33; + machine_code[mc_ptr++] = 0xc0 + (reg % 8) + (reg % 8) * 8; + machine_code[mc_ptr++] = 0xeb; + machine_code[mc_ptr++] = 0x0a; + // Less than: + machine_code[mc_ptr++] = 0x48 + (reg > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0xb8 + (reg % 8); + *(int64_t *) &machine_code[mc_ptr] = 1; + mc_ptr += 8; break; case ret: - reg = compile_tree(output, tree->args); - out[0] = 0x48; - if (regs[reg].type == 1) { - out[1] = 0x8b; - out[2] = 0x45; - out[3] = -regs[reg].rbp; - write(1, out, 4); - } + reg = compile_tree(output, tree->args, 0, 0); + if (reg) { + if (reg >= registers) { + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0x45; + machine_code[mc_ptr++] = -regs[reg].rbp; + } else { + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0xC0 + (reg % 8); + } + } + machine_code[mc_ptr++] = 0xc9; + machine_code[mc_ptr++] = 0xc3; + break; + case writestring: + if (used_regs & (1 << 0)) { + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x50; + next_rbp += 8; + pushed |= (1 << 0); + } + if (used_regs & (1 << arg_order[0])) { + machine_code[mc_ptr++] = 0x48 | (arg_order[0] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x50 + (arg_order[0] % 8); + next_rbp += 8; + pushed |= (1 << arg_order[0]); + } + if (used_regs & (1 << arg_order[1])) { + machine_code[mc_ptr++] = 0x48 | (arg_order[1] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x50 + (arg_order[1] % 8); + next_rbp += 8; + pushed |= (1 << arg_order[1]); + } + if (used_regs & (1 << arg_order[2])) { + machine_code[mc_ptr++] = 0x48 | (arg_order[2] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x50 + (arg_order[2] % 8); + next_rbp += 8; + pushed |= (1 << arg_order[2]); + } + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0xb8; + *(int64_t *) &machine_code[mc_ptr] = 0x01; + mc_ptr += 8; + machine_code[mc_ptr++] = 0x48 | (arg_order[0] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x89; + machine_code[mc_ptr++] = 0xc0 + (arg_order[0] % 8); + /* + machine_code[mc_ptr++] = 0x48 | (arg_order[2] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x89; + machine_code[mc_ptr++] = 0xc0 + (arg_order[2] % 8); + */ + + used_regs |= (1 << 0) | (1 << arg_order[0]); + + compile_tree(output, tree->args, 0, arg_order[2]); + j = compile_tree(output, &tree->args[1], 0, arg_order[1]); /* - out[3] = 0x48; - out[4] = 0x48; - out[5] = 0x48; - out[6] = 0x48; - out[7] = 0x48; - out[8] = 0x48; - out[9] = 0x48; - out[10] = 0x48; - out[0] = 0x48; - out[0] = 0x48; - out[0] = 0x48; + if (used_regs & (1 << arg_order[1]) && j != arg_order[1]) { + machine_code[mc_ptr++] = 0x48 | (arg_order[1] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x50 + (arg_order[1] % 8); + next_rbp += 8; + pushed |= (1 << arg_order[1]); + } + if (j >= registers) { + k = 0; + machine_code[mc_ptr++] = 0x48 | (arg_order[1] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x8d; + machine_code[mc_ptr++] = 0x45 + (arg_order[1] % 8) * 8; + machine_code[mc_ptr++] = -regs[j].rbp; + } else { + k = 1; + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x89; + machine_code[mc_ptr++] = 0x45 + (j % 8) * 8; + machine_code[mc_ptr++] = -next_rbp; + + machine_code[mc_ptr++] = 0x48 | (arg_order[1] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x8d; + machine_code[mc_ptr++] = 0x45 + (arg_order[1] % 8) * 8; + machine_code[mc_ptr++] = -next_rbp; + used_regs &= ~(1 << j); + } */ + machine_code[mc_ptr++] = 0x0F; + machine_code[mc_ptr++] = 0x05; + + // Reset saved variables + if (pushed & (1 << arg_order[2])) { + machine_code[mc_ptr++] = 0x48 | (arg_order[2] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x58 + (arg_order[2] % 8); + next_rbp -= 8; + pushed &= ~(1 << arg_order[2]); + } + if (pushed & (1 << arg_order[1])) { + machine_code[mc_ptr++] = 0x48 | (arg_order[1] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x58 + (arg_order[1] % 8); + next_rbp -= 8; + pushed &= ~(1 << arg_order[1]); + } + if (pushed & (1 << arg_order[0])) { + machine_code[mc_ptr++] = 0x48 | (arg_order[0] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x58 + (arg_order[0] % 8); + next_rbp -= 8; + pushed &= ~(1 << arg_order[0]); + } + if (pushed & (1 << 0)) { + if (!discard_result) { + for (i = 0; i < registers; i++) { + if (!(used_regs & (1 << i))) { + reg = i; + used_regs |= (1 << reg); + break; + } + } + if (i == registers) { + fprintf(stderr, "Out of registers?!? fix me\n"); + exit(1); + } + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0xc0 + (reg % 8) * 8; + } + + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x58; + next_rbp -= 8; + pushed &= ~(1 << 0); + } + break; + case printchar: + if (used_regs & (1 << 0)) { + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x50; + next_rbp += 8; + pushed |= (1 << 0); + } + if (used_regs & (1 << arg_order[0])) { + machine_code[mc_ptr++] = 0x48 | (arg_order[0] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x50 + (arg_order[0] % 8); + next_rbp += 8; + pushed |= (1 << arg_order[0]); + } + if (used_regs & (1 << arg_order[2])) { + machine_code[mc_ptr++] = 0x48 | (arg_order[2] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x50 + (arg_order[2] % 8); + next_rbp += 8; + pushed |= (1 << arg_order[2]); + } + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0xb8; + *(int64_t *) &machine_code[mc_ptr] = 0x01; + mc_ptr += 8; + machine_code[mc_ptr++] = 0x48 | (arg_order[0] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x89; + machine_code[mc_ptr++] = 0xc0 + (arg_order[0] % 8); + machine_code[mc_ptr++] = 0x48 | (arg_order[2] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x89; + machine_code[mc_ptr++] = 0xc0 + (arg_order[2] % 8); + + used_regs |= (1 << 0) | (1 << arg_order[0]) | (1 << arg_order[2]); + + j = compile_tree(output, tree->args, 0, -1); + if (used_regs & (1 << arg_order[1]) && j != arg_order[1]) { + machine_code[mc_ptr++] = 0x48 | (arg_order[1] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x50 + (arg_order[1] % 8); + next_rbp += 8; + pushed |= (1 << arg_order[1]); + } + if (j >= registers) { + k = 0; + machine_code[mc_ptr++] = 0x48 | (arg_order[1] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x8d; + machine_code[mc_ptr++] = 0x45 + (arg_order[1] % 8) * 8; + machine_code[mc_ptr++] = -regs[j].rbp; + } else { + k = 1; + + // Allocate extra space on the stack for the character + // Because of scratch space, this might be unnecessary + /* + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x81; + machine_code[mc_ptr++] = 0xEC; + *(int *) &machine_code[mc_ptr] = 0x08; + mc_ptr += 4; + */ + + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x89; + machine_code[mc_ptr++] = 0x45 + (j % 8) * 8; + machine_code[mc_ptr++] = -next_rbp; + + machine_code[mc_ptr++] = 0x48 | (arg_order[1] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x8d; + machine_code[mc_ptr++] = 0x45 + (arg_order[1] % 8) * 8; + machine_code[mc_ptr++] = -next_rbp; + used_regs &= ~(1 << j); + } + machine_code[mc_ptr++] = 0x0F; + machine_code[mc_ptr++] = 0x05; + + // Reset saved variables + if (pushed & (1 << arg_order[1])) { + machine_code[mc_ptr++] = 0x48 | (arg_order[1] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x58 + (arg_order[1] % 8); + next_rbp -= 8; + pushed &= ~(1 << arg_order[1]); + } + if (pushed & (1 << arg_order[2])) { + machine_code[mc_ptr++] = 0x48 | (arg_order[2] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x58 + (arg_order[2] % 8); + next_rbp -= 8; + pushed &= ~(1 << arg_order[2]); + } + if (pushed & (1 << arg_order[0])) { + machine_code[mc_ptr++] = 0x48 | (arg_order[0] > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x58 + (arg_order[0] % 8); + next_rbp -= 8; + pushed &= ~(1 << arg_order[0]); + } + if (pushed & (1 << 0)) { + if (!discard_result) { + for (i = 0; i < registers; i++) { + if (!(used_regs & (1 << i))) { + reg = i; + used_regs |= (1 << reg); + break; + } + } + if (i == registers) { + fprintf(stderr, "Out of registers?!? fix me\n"); + exit(1); + } + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0xc0 + (reg % 8) * 8; + } + + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x58; + next_rbp -= 8; + pushed &= ~(1 << 0); + } + break; + case exit_p: + j = compile_tree(output, tree->args, 0, arg_order[0]); + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0xb8; + *(int64_t *) &machine_code[mc_ptr] = 0x3c; + mc_ptr += 8; + machine_code[mc_ptr++] = 0x0F; + machine_code[mc_ptr++] = 0x05; + break; } return reg; } +struct elf_hdr { + unsigned int magic; + unsigned char class; + unsigned char endianness; + unsigned char version; + unsigned char abi; + unsigned char abi_version; + unsigned char pad[7]; + unsigned short type; + unsigned short arch; + unsigned int version2; + unsigned long entry; + unsigned long phoff; + unsigned long shoff; + unsigned int flags; + unsigned short ehsize; + unsigned short phsize; + unsigned short phcount; + unsigned short shsize; + unsigned short shcount; + unsigned short shstrindx; +} __attribute((packed)); + +struct prg_hdr { + unsigned int type; + unsigned int flags; + unsigned long offset; + unsigned long vaddr; + unsigned long paddr; + unsigned long fsize; + unsigned long msize; + unsigned long align; +} __attribute((packed)); + +struct sec_hdr { + unsigned int name; + unsigned int type; + unsigned long flags; + unsigned long addr; + unsigned long offset; + unsigned long size; + unsigned int link; + unsigned int info; + unsigned long align; + unsigned long esize; +} __attribute((packed)); + +struct elf_hdr elf_hdr = { 0x464c457f, 2, 1, 1, 0, 0, { 0 }, 2, 0x3e, 1, 0, 0x40, 0, 0, 0x40, 0x38, 0, 0x40, 0, 0 }; +struct prg_hdr load_text = { 1, 5, 0, 0, 0, 0, 0, 1 }; +struct prg_hdr load_rodata = { 1, 4, 0, 0, 0, 0, 0, 1 }; +struct sec_hdr null_section = { 0 }; +struct sec_hdr text_section = { 1, 1, 6, 0, 0, 0, 0, 0, 1, 0 }; +struct sec_hdr rodata_section = { 7, 1, 2, 0, 0, 0, 0, 0, 1, 0 }; +struct sec_hdr strtab_section = { 15, 3, 0x20, 0, 0, 0, 0, 0, 1, 0 }; +char strtab[] = "\0.text\0.rodata\0.strtab"; + int main(int argc, char **argv) { + int i; + char hdr1[] = { // ELF Header + 0x7f, 'E' , 'L' , 'F' , 0x02, 0x01, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x02, 0x00, 0x3E, 0x00, 0x01, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x38, 0x00, 0x02, 0x00, 0x40, 0x00, 0x03, 0x00, 0x02, 0x00, + // LOAD TEXT + 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xb0, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + // LOAD RODATA + 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xb0, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* + // GNU_STACK + 0x51, 0xe5, 0x74, 0x64, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + */ + }; + char phdr[] = { // Program Headers + // LOAD TEXT + 0x01, 0x00, 0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xb0, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, + // LOAD RODATA + 0x01, 0x00, 0x00, 0x00, 0x04, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xb0, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0xb0, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x04, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }; + char hdr2[] = { // Section Header + // NULL shdr + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + // TEXT shdr + 0x01, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xb0, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x5f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + /* + // RODATA shdr + 0x0f, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x5f, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x10, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + */ + // STRTAB shdr + 0x07, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xcf, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x0e, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + }; if (argc == 1) fname = "stdin"; else { fname = argv[1]; @@ -552,6 +1564,91 @@ int main(int argc, char **argv) } yyparse(); breakpoint(); - //stringify(final, -3, 0, NULL, 0); - compile_tree(stdout, final); + stringify(final, -3, 0, NULL, 0); + //start_addr = 0x400000 + sizeof(hdr1); + + elf_hdr.phcount = 2; + elf_hdr.shcount = 4; + elf_hdr.shstrindx = elf_hdr.shcount - 1; + + int hdr1_size = sizeof(elf_hdr) + sizeof(struct prg_hdr) * elf_hdr.phcount; + start_addr = 0x400000 + hdr1_size; + rodata_addr = 0x600000 + sizeof(hdr1) + 0xbb; + fprintf(stderr, "final args: %i\n", final->args_count); + for (i = 0; i < final->args_count; i++) { + compile_tree(stdout, &final->args[i], 1, -1); + } + if (entry_addr < 0) { + fprintf(stderr, "No _start symbol found!\n"); + exit(1); + } + *(int64_t *) &hdr1[0x18] = start_addr + entry_addr; + *(int64_t *) &hdr1[0x28] = sizeof(hdr1) + mc_ptr + rd_ptr; + elf_hdr.entry = start_addr + entry_addr; + elf_hdr.shoff = hdr1_size + mc_ptr + rd_ptr; + + *(int64_t *) &hdr1[0x48] = 0;//sizeof(hdr1); + *(int64_t *) &hdr1[0x50] = start_addr - sizeof(hdr1); + *(int64_t *) &hdr1[0x58] = start_addr - sizeof(hdr1); + *(int64_t *) &hdr1[0x60] = rd_ptr + mc_ptr + sizeof(hdr1); + *(int64_t *) &hdr1[0x68] = rd_ptr + mc_ptr + sizeof(hdr1); + load_text.offset = 0; + load_text.vaddr = 0x400000; + load_text.paddr = 0x400000; + load_text.fsize = hdr1_size + mc_ptr; + load_text.msize = hdr1_size + mc_ptr; + + *(int64_t *) &hdr1[0x80] = sizeof(hdr1) + mc_ptr; + *(int64_t *) &hdr1[0x88] = rodata_addr; + *(int64_t *) &hdr1[0x90] = rodata_addr; + *(int64_t *) &hdr1[0x98] = rd_ptr; + *(int64_t *) &hdr1[0xa0] = rd_ptr; + load_rodata.offset = hdr1_size + mc_ptr; + load_rodata.vaddr = 0x600000; + load_rodata.paddr = 0x600000; + //load_rodata.fsize = rd_ptr; + load_rodata.fsize = rd_ptr; + load_rodata.msize = rd_ptr + load_rodata.offset; + + *(int64_t *) &hdr2[0x50] = start_addr; + *(int64_t *) &hdr2[0x58] = sizeof(hdr1); + *(int64_t *) &hdr2[0x60] = mc_ptr; + /* + *(int64_t *) &hdr2[0x90] = rodata_addr; + *(int64_t *) &hdr2[0x98] = sizeof(hdr1) + mc_ptr; + *(int64_t *) &hdr2[0xa0] = rd_ptr; + *(int64_t *) &hdr2[0xd8] = sizeof(hdr1) + mc_ptr + rd_ptr + sizeof(hdr2); + *(int64_t *) &hdr2[0xe0] = sizeof(strtab); + */ + *(int64_t *) &hdr2[0x98] = sizeof(hdr1) + mc_ptr + rd_ptr + sizeof(hdr2); + *(int64_t *) &hdr2[0xa0] = sizeof(strtab); + text_section.addr = start_addr; + text_section.offset = hdr1_size; + text_section.size = mc_ptr; + + rodata_section.addr = load_rodata.vaddr; + rodata_section.offset = load_rodata.offset; + rodata_section.size = rd_ptr; + + //strtab_section.addr = 0; + strtab_section.offset = hdr1_size + mc_ptr + rd_ptr + sizeof(struct sec_hdr) * elf_hdr.shcount; + strtab_section.size = sizeof(strtab); + + write(1, &elf_hdr, sizeof(elf_hdr)); + write(1, &load_text, sizeof(load_text)); + write(1, &load_rodata, sizeof(load_rodata)); + write(1, machine_code, mc_ptr); + write(1, rodata, rd_ptr); + write(1, &null_section, sizeof(null_section)); + write(1, &text_section, sizeof(text_section)); + write(1, &rodata_section, sizeof(rodata_section)); + write(1, &strtab_section, sizeof(strtab_section)); + write(1, strtab, sizeof(strtab)); + /* + write(1, hdr1, sizeof(hdr1)); + write(1, machine_code, mc_ptr); + write(1, rodata, rd_ptr); + write(1, hdr2, sizeof(hdr2)); + write(1, strtab, sizeof(strtab)); + */ }