compiler

Unnamed repository; edit this file 'description' to name the repository.
git clone https://git.deepztream.com/compiler
Log | Files | Refs

commit 42e661f0b1f075a0920042cad22b4933d35e541b
parent 4f394f9ac8ea9732fd67d8a360a52ad1c229075a
Author: William Djupström <william@deepztream.com>
Date:   Mon, 25 Mar 2019 10:09:57 +0000

Added more features and optimizations

Diffstat:
Mlex.l | 54++++++++++++++++++++++++++++++++++++++++++++++++++++++
Mparse.y | 975+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++--------
2 files changed, 939 insertions(+), 90 deletions(-)

diff --git a/lex.l b/lex.l @@ -1,6 +1,43 @@ %{ #include "parse.tab.h" extern int line; + +int is_digit(char c, int base) +{ + if (base == 2) return (c == '0' && c == '1'); + if (base == 8) return (c >= '0' && c <= '7'); + if (base == 10) return (c >= '0' && c <= '9'); + if (base == 16) { + return (c >= '0' && c <= '9' || (c | 32) >= 'a' && (c | 32) <= 'z'); + } +} + +long to_long(char *str) +{ + long r = 0; + int i; + int base = 10; + int len; + if (*str == '0' && str[1] == 'b') base = 2, str += 2; + else if (*str == '0' && str[1] == 'o') base = 8, str += 2; + else if (*str == '0' && str[1] == 'x') base = 16, str += 2; + for (len = 0; is_digit(str[len], base); len++); + for (i = 0; i < len; i++) { + r *= base; + switch (base) { + case 2: case 8: case 10: + r += str[i] - '0'; + break; + case 16: + if (str[i] >= '0' && str[i] <= '9') + r += str[i] - '0'; + else + r += (str[i] & ~(1 << 5)) - 'A' + 10; + break; + } + } + return r; +} %} %% @@ -12,6 +49,14 @@ extern int line; "exit" {return EXIT;} "putc" {return PUTC;} "write" {return WRITE;} +"read" {return READ;} +"brk" {return BRK;} +"_syscall1" {return SYSCALL1;} +"_syscall2" {return SYSCALL2;} +"_syscall3" {return SYSCALL3;} +"_syscall4" {return SYSCALL4;} +"_syscall5" {return SYSCALL5;} +"_syscall6" {return SYSCALL6;} "++" {return INC;} "--" {return DEC;} @@ -34,12 +79,21 @@ extern int line; "<<" {return SHIFT_L;} ">>" {return SHIFT_R;} +0x[0-9a-zA-Z.]+ {yylval.i = to_long(yytext); return NUMBER;} [0-9.]+ {yylval.i = atol(yytext); return NUMBER;} [a-zA-Z_][a-zA-Z0-9_]* {yylval.str = calloc(strlen(yytext)+1, 1); strcpy(yylval.str, yytext); return IDENTIFIER;} \"[^\"]*\" {yytext[strlen(yytext) - 1] = 0; yylval.str = calloc(strlen(yytext), 1); strcpy(yylval.str, yytext+1); return STRING;} +'.' {yylval.i = yytext[1]; return NUMBER;} +'\\0' {yylval.i = '\0'; return NUMBER;} +'\\t' {yylval.i = '\t'; return NUMBER;} +'\\n' {yylval.i = '\n'; return NUMBER;} +'\\r' {yylval.i = '\r'; return NUMBER;} +'\\\'' {yylval.i = '\''; return NUMBER;} [ \t]+ {;} "\r\n" {line++;} [\r\n] {line++;} +\/\/[^\n]* {;} +[/][*].*[*][/] {;} . {return *yytext;} %% /* diff --git a/parse.y b/parse.y @@ -35,7 +35,8 @@ enum id_type { f(shift_l) f(shift_r) \ f(set) f(decl) \ f(ifnz) f(loop) \ - f(printchar) f(writestring) \ + f(printchar) f(writestring) f(readstring)\ + f(brk_sys) f(_syscall) \ f(function) f(fcall) f(ret) f(exit_p) enum e_type { @@ -50,6 +51,7 @@ struct expression { int constant; int declares; char *str; + char flags; int64_t i; size_t args_count; struct expression *args; @@ -62,8 +64,12 @@ struct identifiers{ struct identifiers *ids = NULL; struct expression *defid(char *name) { + int i; if (!ids) ids = calloc(sizeof(struct identifiers), 1); + for (i = 0; i < ids->count; i++) { + if (!strcmp(ids->identifiers[i].str, name)) return &ids->identifiers[i]; + } ids->count++; ids->identifiers = realloc(ids->identifiers, sizeof(struct expression) * ids->count); @@ -128,7 +134,7 @@ struct expression *e(enum e_type type, struct expression *arg1, struct expressio if (arg1) memmove(r->args, arg1, sizeof(struct expression)); if (arg2) memmove(&r->args[arg1 ? 1 : 0], arg2, sizeof(struct expression)); } - if (!nofree && arg1 && !arg1->constant) free(arg1); + //if (!nofree && arg1 && !arg1->constant) free(arg1); if (!nofree && arg2 && !arg2->constant) free(arg2); return r; } @@ -245,7 +251,12 @@ char *escape_str(char *str) { %token ADD_EQ "+=" SUB_EQ "-=" MUL_EQ "*=" DIV_EQ "/=" MOD_EQ "%=" %token OR_EQ "|=" AND_EQ "&=" XOR_EQ "^=" SHL_EQ "<<=" SHR_EQ ">>=" %token RETURN "return" IF "if" ELSE "else" WHILE "while" DECL "decl" -%token EXIT "exit" PUTC "putc" WRITE "write" +%token EXIT "exit" PUTC "putc" WRITE "write" READ "read" BRK "brk" +%token SYSCALL1 "_syscall1" SYSCALL2 "_syscall2" SYSCALL3 "_syscall3" +%token SYSCALL4 "_syscall4" SYSCALL5 "_syscall5" SYSCALL6 "_syscall6" + +%nonassoc "if" +%nonassoc "else" %left ',' %right '?' ':' '=' "+=" "-=" "*=" "/=" "%=" "&=" "|=" "^=" "<<=" ">>=" @@ -262,8 +273,8 @@ char *escape_str(char *str) { %right REF NEG "++" "--" '!' '~' %left '(' '[' -%type <expr> expression exprs c_expr var_def var_defs statement comp_stmt if_stmt else_stmt -%type <expr> parameters parameter identifier program function +%type <expr> expression exprs c_expr var_def var_defs statement comp_stmt if_stmt +%type <expr> parameters parameter identifier program function ifelse %type <str> str %% @@ -285,8 +296,7 @@ parameter: parameter ',' parameter {$$ = e(comma, $1, $3, 0);} statement: "return" exprs ';' {$$ = e(ret, $2, NULL, 0);} | "while" '(' expression ')' statement {$$ = e(loop, $3, $5, 0);} -| else_stmt {$$ = move($1);} -| if_stmt {$$ = move($1);} +| ifelse {$$ = move($1);} | exprs ';' {$$ = move($1);} | "exit" ';' {$$ = e(exit_p, ZERO, NULL, 0);} | "exit" expression ';' {$$ = e(exit_p, $2, NULL, 0);} @@ -294,7 +304,10 @@ statement: "return" exprs ';' {$$ = e(ret, $2, NULL, 0);} | ';' {$$ = e(nop, NULL, NULL, 1);} ; -else_stmt: if_stmt "else" statement {$$ = move($1); append($$, $3);} +; + +ifelse: if_stmt "else" statement %prec "else" {$$ = move($1); append($$, $3);} +| if_stmt %prec "if" {$$ = move($1);} ; if_stmt: "if" '(' expression ')' statement {$$ = e(ifnz, $3, $5, 0);} @@ -304,7 +317,7 @@ comp_stmt: '{' {$$ = e(comma, NULL, NULL, 1); | comp_stmt statement {$$ = move($1); append($$, $2);} ; -var_defs: "decl" var_def {$$ = e(comma, $2, NULL, 0);} +var_defs: "decl" var_def {$$ = move($2);} | var_defs ',' var_def {$$ = e(comma, $1, $3, 0);} ; @@ -323,7 +336,7 @@ c_expr: expression {$$ = move($1);} | c_expr ',' expression {$$ = e(comma, $1, $3, 0);} ; -expression: IDENTIFIER {$$ = copy(find_idnt($1));} +expression: IDENTIFIER {$$ = copy(defid($1)); /*$$ = copy(find_idnt($1));*/} | NUMBER {$$ = e(number, (void *)$1, NULL, 1);} | str {$$ = e(string, (void *)expand_str($1), NULL, 0);} | expression '[' expression ']' {$$ = e(deref, e(add, $1, $3, 0), NULL, 0);} @@ -331,44 +344,97 @@ expression: IDENTIFIER {$$ = copy(find_idnt($1));} | expression '(' ')' {$$ = e(fcall, $1, NULL, 0);} | expression '(' c_expr ')' {$$ = e(fcall, $1, $3, 0);} | "putc" '(' expression ')' {$$ = e(printchar, $3, NULL, 0);} -| "write" '(' expression ',' expression ')' {$$ = e(writestring, $3, $5, 0);} +| "write" '(' expression ',' expression ',' expression ')' {$$ = e(writestring, e(comma, e(comma, $3, $5, 0), $7, 0), NULL, 0);} +| "read" '(' expression ',' expression ',' expression ')' {$$ = e(readstring, e(comma, e(comma, $3, $5, 0), $7, 0), NULL, 0);} +| "brk" '(' expression ')' {$$ = e(brk_sys, $3, NULL, 0);} +| "_syscall1" '(' expression ',' + expression ')' {$$ = e(_syscall, $3, $5, 0);} + +| "_syscall2" '(' expression ',' + expression ',' + expression ')' {$$ = e(_syscall, $3, e(comma, $5, $7, 0), 0);} + +| "_syscall3" '(' expression ',' + expression ',' + expression ',' + expression ')' {$$ = e(_syscall, $3, e(comma, e(comma, $5, $7, 0), $9, 0), 0);} + +| "_syscall4" '(' expression ',' + expression ',' + expression ',' + expression ',' + expression ')' {$$ = e(_syscall, $3, e(comma, e(comma, e(comma, $5, $7, 0), $9, 0), $11, 0), 0);} + +| "_syscall5" '(' expression ',' + expression ',' + expression ',' + expression ',' + expression ',' + expression ')' {$$ = e(_syscall, $3, e(comma, e(comma, e(comma, e(comma, $5, $7, 0), $9, 0), $11, 0), $13, 0), 0);} + +| "_syscall6" '(' expression ',' + expression ',' + expression ',' + expression ',' + expression ',' + expression ',' + expression ')' {$$ = e(_syscall, $3, e(comma, e(comma, e(comma, e(comma, e(comma, $5, $7, 0), $9, 0), $11, 0), $13, 0), $15, 0), 0);} + | expression '+' expression {$$ = e(add, $1, $3, 0);} -| expression '-' expression {$$ = e(add, $1, e(neg, $3, NULL, 0), 0);} +| expression '-' expression {if ($3->type == number) { + $$ = e(add, $1, $3, 0); + $$->args[1].i = -$$->args[1].i; + } else { + $$ = e(add, $1, e(neg, $3, NULL, 0), 0); + }} | expression '*' expression {$$ = e(mul, $1, $3, 0);} | expression '/' expression {$$ = e(divide, $1, $3, 0);} | expression '%' expression {$$ = e(mod, $1, $3, 0);} | expression '&' expression {$$ = e(b_and, $1, $3, 0);} -| expression '|' expression {printf("uh-oh\n");$$ = NULL;} -| expression '^' expression {printf("uh-oh\n");$$ = NULL;} +| expression '|' expression {$$ = e(b_or, $1, $3, 0);} +| expression '^' expression {$$ = e(b_xor, $1, $3, 0);} | expression "<<" expression {$$ = e(shift_l, $1, $3, 0);} | expression ">>" expression {$$ = e(shift_r, $1, $3, 0);} | expression '=' expression {$$ = e(set, $1, $3, 0);} | expression "+=" expression {$$ = e(set, $1, e(add, $1, $3, 0), 0);} -| expression "-=" expression {$$ = e(set, $1, e(add, $1, e(neg, $3, NULL, 0), 0), 0);} +| expression "-=" expression {if ($3->type == number) { + $$ = e(add, $1, $3, 0); + $$->args[1].i = -$$->args[1].i; + $$ = e(set, $1, $$, 0); + } else { + $$ = e(set, $1, e(add, $1, e(neg, $3, NULL, 0), 0), 0); + }} | expression "*=" expression {$$ = e(set, $1, e(mul, $1, $3, 0), 0);} | expression "/=" expression {$$ = e(set, $1, e(divide, $1, $3, 0), 0);} | expression "%=" expression {$$ = e(set, $1, e(mod, $1, $3, 0), 0);} | expression "&=" expression {$$ = e(set, $1, e(b_and, $1, $3, 0), 0);} -| expression "|=" expression {printf("uh-oh\n");$$ = NULL;} -| expression "^=" expression {printf("uh-oh\n");$$ = NULL;} +| expression "|=" expression {$$ = e(set, $1, e(b_or, $1, $3, 0), 0);} +| expression "^=" expression {$$ = e(set, $1, e(b_xor, $1, $3, 0), 0);} | expression "<<=" expression {$$ = e(set, $1, e(shift_l, $1, $3, 0), 0);} | expression ">>=" expression {$$ = e(set, $1, e(shift_r, $1, $3, 0), 0);} | expression "==" expression {$$ = e(l_eq, $1, $3, 0);} | expression "!=" expression {$$ = e(l_not, e(l_eq, $1, $3, 0), NULL, 0);} | expression '<' expression {$$ = e(l_lt, $1, $3, 0);} | expression '>' expression {$$ = e(l_lt, $3, $1, 0);} -| expression "<=" expression {$$ = e(l_not, e(l_lt, $3, $1, 0), NULL, 0);} -| expression ">=" expression {$$ = e(l_not, e(l_lt, $1, $3, 0), NULL, 0);} +| expression "<=" expression {$$ = e(l_not, e(l_lt, $1, $3, 0), NULL, 0);} +| expression ">=" expression {$$ = e(l_not, e(l_lt, $3, $1, 0), NULL, 0);} +| expression "&&" expression {$$ = e(l_and, $1, $3, 0);} +| expression "||" expression {$$ = e(l_or, $1, $3, 0);} | '!' expression {$$ = e(l_not, $2, NULL, 0);} | '~' expression {printf("uh-oh\n");$$ = NULL;} | '*' expression %prec REF {$$ = e(deref, $2, NULL, 0);} | '&' expression %prec REF {$$ = e(ref, $2, NULL, 0);} -| '-' expression %prec NEG {$$ = e(neg, $2, NULL, 0);} +| '-' expression %prec NEG {if ($2->type == number) { + $$ = $2; + $$->i = -$$->i; + } else { + $$ = e(neg, $2, NULL, 0); + }} | '+' expression %prec NEG {printf("uh-oh\n");$$ = NULL;} | "++" expression {$$ = e(set, $2, e(add, $2, ONE, 0), 0);} | "--" expression {$$ = e(set, $2, e(add, $2, e(neg, ONE, NULL, 0), 0), 0);} -| expression "++" {printf("uh-oh\n");$$ = NULL;} -| expression "--" {printf("uh-oh\n");$$ = NULL;} +| expression "++" {printf("uh-oh ++\n");$$ = NULL;} +| expression "--" {printf("uh-oh --\n");$$ = NULL;} ; str: str STRING {$$ = calloc(strlen($1) + strlen($2) + 1, 1); strcpy($$, $1); strcpy(&$$[strlen($$)], $2); free($1); free($2);} @@ -385,14 +451,51 @@ EXPRESSION_TYPES(f) #undef f }; +int has_sideeffect(struct expression *n) +{ + int i, r = 0; + switch (n->type) { + case nop: case number: case idnt: case string: + return 0; + case set: case decl: case ret: + return 1; + case brk_sys: case printchar: case writestring: + case readstring: case _syscall: case exit_p: + return 2; + case neg: case l_not: case b_not: case ref: case deref: + return has_sideeffect(n->args); + case add: case mul: case divide: case mod: case b_and: case b_or: case b_xor: + case l_and: case l_or: case l_lt: case l_eq: case shift_l: case shift_r: + case loop: case comma: case ifnz: + for (i = 0; i < n->args_count; i++) + r |= has_sideeffect(&n->args[i]); + return r; + default: + return 1; + } +} + void stringify(struct expression *e, int p_indent, int last, char *name, uint64_t idmap) { int i, indent = p_indent + 3; char *str; if (last) idmap |= (1 << p_indent/3); for (i = 0; i < p_indent; i+=3) fprintf(stderr, "%s ", (idmap & (1 << (i/3))) ? " " : "│"); - if (p_indent >= 0) fprintf(stderr, "%s━ ", last ? "┕": "┝"); + if (p_indent >= 0) fprintf(stderr, "%s- ", last ? "-": "+"); switch(e->type) { + case function: + fprintf(stderr, "%s (", e->args->str); + if (e->args_count == 3) { + if (e->args[1].type == comma) { + for (i = 0; i < e->args[1].args_count; i++) + fprintf(stderr, "%s%s", e->args[1].args[i].str, i+1 == e->args[1].args_count ? "" : ", "); + } else { + fprintf(stderr, "%s", e->args[1].str); + } + } + fprintf(stderr, "):\n"); + stringify(&e->args[e->args_count-1], indent, 1, NULL, idmap); + break; case string: str = escape_str(e->str); fprintf(stderr, "%s: \"%s\"\n", name ? name : "string", str); @@ -405,7 +508,7 @@ void stringify(struct expression *e, int p_indent, int last, char *name, uint64_ fprintf(stderr, "%s: %li\n", name ? name : "number", e->i); break; default: - fprintf(stderr, "%s:\n", exp_str[e->type]); + fprintf(stderr, "%s: (pure: %i)\n", exp_str[e->type], !has_sideeffect(e)); for (i = 0; i < e->args_count;) { stringify(&e->args[i++], indent, i+1 == e->args_count, NULL, idmap); } @@ -438,7 +541,7 @@ int next_reg; uint16_t used_regs; int next_rbp; -char machine_code[8192] = { 0 }; +char machine_code[16384] = { 0 }; int mc_ptr = 0; char rodata[8192] = { 0 }; int rd_ptr = 0; @@ -448,7 +551,8 @@ int func_declares; int arg_order[16] = {7, 6, 2, 1, 8, 9, 1, 0, 11, 12, 13, 14, 15, 3, 5}; int syscall_arg[16] = {7, 6, 2, 10, 8, 9, -1, -1, -1, -1, -1, -1, -1, -1, -1}; -int caller_save = 0b0000111111001111; +//int caller_save = 0b0000111111001111; +int caller_save = 0b1111111111001111; int get_free_reg(int mark_used) { @@ -496,6 +600,47 @@ void pop(int reg, int *pushed) { reg_hist[reg].type = 0; } +struct func_template { + char *fname; + int fptr; +}; + +struct func_template *ftemp; +int ftemp_len = 0; + +void func_append(char *fname, int ptr) +{ + int i, j; + for (i = 0; i < ftemp_len; i++) + if (!strcmp(ftemp[i].fname, fname)) break; + if (i == ftemp_len) { + ftemp = realloc(ftemp, sizeof(struct func_template) * ++ftemp_len); + ftemp[i].fname = malloc(strlen(fname) + 1); + strcpy(ftemp[i].fname, fname); + ftemp[i].fptr = 0; + } + if (!ftemp[i].fptr) { + ftemp[i].fptr = ptr; + } else { + for (j = ftemp[i].fptr; *((int *) &machine_code[j]); j = *((int *) &machine_code[j])); + *((int *) &machine_code[j]) = ptr; + } +} + +void func_retrofit(char *fname, int ptr) +{ + int i, j, k; + for (i = 0; i < ftemp_len; i++) + if (!strcmp(ftemp[i].fname, fname)) break; + if (i == ftemp_len) return; + + for (j = ftemp[i].fptr, k = 1; k; j = k) { + k = *((int *) &machine_code[j]); + *((int *) &machine_code[j]) = ptr - j - 4; + } + ftemp[i].fptr = 0; +} + int compile_tree(FILE *output, struct expression *tree, int discard_result, int out_reg) { int i, j, k, l, reg = -1; @@ -505,7 +650,6 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int -1, -1, -1, -1 }; int pushed = 0; struct expression *tmp; - //fprintf(stderr, "Type: %s\n", exp_str[tree->type]); switch (tree->type) { case function: next_reg = registers; @@ -513,6 +657,8 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int next_rbp = 8; tree->i = mc_ptr; find_idnt(tree->args->str)->i = mc_ptr + start_addr; + find_idnt(tree->args->str)->flags |= 1; + func_retrofit(tree->args->str, mc_ptr); memset(reg_hist, 0, sizeof(reg_hist)); if (!strcmp(tree->args->str, "_start")) { entry_addr = mc_ptr; @@ -570,9 +716,9 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int reg_hist[arg_order[0]].rev = ++regs[registers].rev; } else if (tree->args[1].type == comma && tree->args[1].args->type == idnt && tree->args[1].args->id_type == parameter) { for (i = 0; i < tree->args[1].args_count; i++) { - machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x48 | (arg_order[i] > 7 ? 4 : 0); machine_code[mc_ptr++] = 0x89; - machine_code[mc_ptr++] = 0x45 + 8*arg_order[i]; + machine_code[mc_ptr++] = 0x45 + (arg_order[i] % 8) * 8; machine_code[mc_ptr++] = -regs[registers + i].rbp; reg_hist[arg_order[i]].type = 1; reg_hist[arg_order[i]].reg = registers + i; @@ -608,9 +754,14 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int j = compile_tree(output, &tree->args[1], 0, arg_order[0]); } } - j = find_idnt(tree->args->str)->i; machine_code[mc_ptr++] = 0xe8; - *(int32_t *) &machine_code[mc_ptr] = j - start_addr - mc_ptr - 4; + if (find_idnt(tree->args->str)->flags & 1) { + j = find_idnt(tree->args->str)->i - start_addr - mc_ptr - 4; + } else { + func_append(tree->args->str, mc_ptr); + j = 0; + } + *(int32_t *) &machine_code[mc_ptr] = j; mc_ptr += 4; if (tree->args[1].type == comma) @@ -626,7 +777,29 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int } break; case add: - j = compile_tree(output, tree->args, discard_result, -1); + if (!discard_result && + (tree->args->type == idnt || tree->args[1].type == idnt)) { + if (tree->args->type == idnt) { + j = compile_tree(output, tree->args, discard_result, -1); + tmp = &tree->args[1]; + } else { + j = compile_tree(output, &tree->args[1], discard_result, -1); + tmp = tree->args; + } + if (j == out_reg && tmp->type == number && + tmp->i < (long) 0x80000000 && tmp->i >= (long) -0x80000000) { + reg = j; + k = (int) tmp->i; + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x81; + machine_code[mc_ptr++] = 0x45; + *(int32_t *) &machine_code[mc_ptr] = k; + mc_ptr += 4; + break; + } + } else { + j = compile_tree(output, tree->args, discard_result, -1); + } k = compile_tree(output, &tree->args[1], discard_result, -1); if (discard_result) { if (j >= 0 && j < registers) used_regs &= ~(1 << j); @@ -659,7 +832,7 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int } else { machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0) | (k > 7 ? 1 : 0); machine_code[mc_ptr++] = 0x03; - machine_code[mc_ptr++] = 0xC0 + (j % 8) * 8 + k; + machine_code[mc_ptr++] = 0xC0 + (j % 8) * 8 + (k % 8); reg = j, l = k; } } else { @@ -689,7 +862,7 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int } else { machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0) | (k > 7 ? 1 : 0); machine_code[mc_ptr++] = 0x03; - machine_code[mc_ptr++] = 0xC0 + (j % 8) * 8 + k; + machine_code[mc_ptr++] = 0xC0 + (j % 8) * 8 + (k % 8); if (reg_hist[j].type & 2 && reg_hist[k].type & 2) { reg_hist[j].type = 2; reg_hist[j].val += reg_hist[k].val; @@ -727,6 +900,94 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int if (k < registers) used_regs &= ~(1 << k); break; } + if (out_reg < 0 || out_reg != j && out_reg != k) { + if (k >= registers) { + if (j >= registers) { + reg = out_reg; + if (out_reg < 0 || out_reg >= registers) + reg = get_free_reg(1); + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0x45 + (reg % 8) * 8; + machine_code[mc_ptr++] = -regs[j].rbp; + j = reg; + } + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x0F; + machine_code[mc_ptr++] = 0xAF; + machine_code[mc_ptr++] = 0x45 + (j % 8) * 8; + machine_code[mc_ptr++] = -regs[k].rbp; + reg = j, l = k; + } else if (j >= registers) { + machine_code[mc_ptr++] = 0x48 | (k > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x0F; + machine_code[mc_ptr++] = 0xAF; + machine_code[mc_ptr++] = 0x45 + (k % 8) * 8; + machine_code[mc_ptr++] = -regs[j].rbp; + reg = k, l = j; + } else { + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0) | (k > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x0F; + machine_code[mc_ptr++] = 0xAF; + machine_code[mc_ptr++] = 0xC0 + (j % 8) * 8 + (k % 8); + reg = j, l = k; + } + } else { + if (out_reg == k) { + l = j; + j = k; + k = l; + } + if (j >= registers) { + reg = get_free_reg(1); + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0x45 + (reg % 8) * 8; + machine_code[mc_ptr++] = -regs[j].rbp; + j = reg; + if (k >= registers) { + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x0F; + machine_code[mc_ptr++] = 0xAF; + machine_code[mc_ptr++] = 0x45 + (j % 8) * 8; + machine_code[mc_ptr++] = -regs[k].rbp; + } else { + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0) | (k > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x0F; + machine_code[mc_ptr++] = 0xAF; + machine_code[mc_ptr++] = 0xC0 + (j % 8) * 8 + (k % 8); + //machine_code[mc_ptr++] = -regs[j].rbp; + } + } else if (k >= registers) { + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x0F; + machine_code[mc_ptr++] = 0xAF; + machine_code[mc_ptr++] = 0x45 + (j % 8) * 8; + machine_code[mc_ptr++] = -regs[k].rbp; + } else { + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0) | (k > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x0F; + machine_code[mc_ptr++] = 0xAF; + machine_code[mc_ptr++] = 0xC0 + (j % 8) * 8 + (k % 8); + if (reg_hist[j].type & 2 && reg_hist[k].type & 2) { + reg_hist[j].type = 2; + reg_hist[j].val += reg_hist[k].val; + } else if (reg_hist[j].type & 2) { + reg_hist[j].type = 0; + } + } + reg = j, l = k; + } + if (l < registers) used_regs &= ~(1 << l); + break; + /* + j = compile_tree(output, tree->args, discard_result, -1); + k = compile_tree(output, &tree->args[1], discard_result, -1); + if (discard_result) { + if (j < registers) used_regs &= ~(1 << j); + if (k < registers) used_regs &= ~(1 << k); + break; + } reg = j; if ((out_reg == -1 || out_reg >= registers) && j >= registers) { reg = get_free_reg(1); @@ -754,6 +1015,7 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int used_regs &= ~(1 << k); } break; + */ case divide: if (discard_result) { j = compile_tree(output, tree->args, 1, -1); @@ -793,6 +1055,7 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int j = compile_tree(output, tree->args, 0, 0); machine_code[mc_ptr++] = 0x48; machine_code[mc_ptr++] = 0x99; + used_regs |= (1 << 0) | (1 << 2); k = compile_tree(output, &tree->args[1], 0, -1); machine_code[mc_ptr++] = 0x48; @@ -912,7 +1175,61 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int machine_code[mc_ptr++] = -regs[k].rbp; } else { machine_code[mc_ptr-2] |= k > 7 ? 1 : 0; - machine_code[mc_ptr++] = 0xC0 + (reg % 8) * 8 + k; + machine_code[mc_ptr++] = 0xC0 + (reg % 8) * 8 + (k % 8); + } + if (k < registers) used_regs &= ~(1 << k); + break; + case b_or: + j = compile_tree(output, tree->args, discard_result, out_reg); + k = compile_tree(output, &tree->args[1], discard_result, -1); + if (discard_result) { + if (j < registers) used_regs &= ~(1 << j); + if (k < registers) used_regs &= ~(1 << k); + break; + } + reg = j; + if (reg >= registers) { + reg = get_free_reg(1); + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0x45 + (reg % 8) * 8; + machine_code[mc_ptr++] = -regs[j].rbp; + } + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x0b; + if (tree->args[1].type == idnt) { + machine_code[mc_ptr++] = 0x45 + (reg % 8) * 8; + machine_code[mc_ptr++] = -regs[k].rbp; + } else { + machine_code[mc_ptr-2] |= k > 7 ? 1 : 0; + machine_code[mc_ptr++] = 0xC0 + (reg % 8) * 8 + (k % 8); + } + if (k < registers) used_regs &= ~(1 << k); + break; + case b_xor: + j = compile_tree(output, tree->args, discard_result, out_reg); + k = compile_tree(output, &tree->args[1], discard_result, -1); + if (discard_result) { + if (j < registers) used_regs &= ~(1 << j); + if (k < registers) used_regs &= ~(1 << k); + break; + } + reg = j; + if (reg >= registers) { + reg = get_free_reg(1); + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0x45 + (reg % 8) * 8; + machine_code[mc_ptr++] = -regs[j].rbp; + } + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x33; + if (tree->args[1].type == idnt) { + machine_code[mc_ptr++] = 0x45 + (reg % 8) * 8; + machine_code[mc_ptr++] = -regs[k].rbp; + } else { + machine_code[mc_ptr-2] |= k > 7 ? 1 : 0; + machine_code[mc_ptr++] = 0xC0 + (reg % 8) * 8 + (k % 8); } if (k < registers) used_regs &= ~(1 << k); break; @@ -924,9 +1241,60 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int regs[reg].var = tree->args->str; break; case set: + /* reg = compile_tree(output, tree->args, 0, -1); j = compile_tree(output, &tree->args[1], 0, reg); break; + case set: + */ + if (tree->args->type == deref) + reg = compile_tree(output, tree->args->args, 0, -1); + else + reg = compile_tree(output, tree->args, 0, -1); + if (reg >= registers && tree->args->type != deref) { + j = compile_tree(output, &tree->args[1], 0, reg); + break; + } + j = compile_tree(output, &tree->args[1], 0, -1); + if (j < 0) { + fprintf(stderr, "No rvalue for assignment\n"); + exit(1); + } + if (j >= registers) { + k = get_free_reg(1); + machine_code[mc_ptr++] = 0x48 | (k > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0x45 + (k % 8) * 8; + machine_code[mc_ptr++] = -regs[j].rbp; + } else { + k = j; + } + if (tree->args->type == deref) { + if (reg >= registers) { + j = get_free_reg(1); + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x8b; + machine_code[mc_ptr++] = 0x45 + (j % 8) * 8; + machine_code[mc_ptr++] = -regs[reg].rbp; + reg = j; + } + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 1 : 0) | (k > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x89; + machine_code[mc_ptr++] = 0x00 + (reg % 8) + (k % 8) * 8; + } else if (reg >= registers) { + machine_code[mc_ptr++] = 0x48 | (k > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x89; + machine_code[mc_ptr++] = 0x45 + (k % 8) * 8; + machine_code[mc_ptr++] = -regs[reg].rbp; + } + if (discard_result) { + used_regs &= ~(1 << k); + used_regs &= ~(1 << reg); + reg = -1; + break; + } + reg = k; + break; case ref: if (tree->args->type != idnt) { fprintf(stderr, "Unable to reference non-identifier\n"); @@ -971,6 +1339,7 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int fprintf(stderr, "Unable to locate identifier <%s>\n", tree->str); exit(1); } + /* for (i = 0; i < registers; i++) { if (reg_hist[i].type & 1 && reg_hist[i].reg == reg && reg_hist[i].rev == regs[reg].rev) { reg = i; @@ -978,39 +1347,73 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int break; } } + */ break; case number: if (discard_result) break; + /* for (i = 0; i < registers; i++) { if (reg_hist[i].type & 2 && reg_hist[i].val == tree->i) { reg = i; break; } } - if (reg < 0) { + */ + if (out_reg < 0 || out_reg >= registers) + reg = get_free_reg(1); + else reg = out_reg; - if (reg == -1 || reg >= registers) - reg = get_free_reg(0); - if (tree->i) { + if (tree->i) { + if (tree->i == 1) { + if (reg > 7) + machine_code[mc_ptr++] = 0x41; + machine_code[mc_ptr++] = 0x33; + machine_code[mc_ptr++] = 0xc0 + (reg % 8) + (reg % 8) * 8; + if (reg > 7) + machine_code[mc_ptr++] = 0x41; + machine_code[mc_ptr++] = 0xff; + machine_code[mc_ptr++] = 0xc0 + (reg % 8); + } else if (tree->i == -1) { + if (reg > 7) + machine_code[mc_ptr++] = 0x41; + machine_code[mc_ptr++] = 0x33; + machine_code[mc_ptr++] = 0xc0 + (reg % 8) + (reg % 8) * 8; + + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0xff; + machine_code[mc_ptr++] = 0xc8 + (reg % 8); + } else if (tree->i <= 0x7FFFFFFF && tree->i > 0) { + if (reg > 7) + machine_code[mc_ptr++] = 0x41; + machine_code[mc_ptr++] = 0xb8 + (reg % 8); + *(int32_t *) &machine_code[mc_ptr] = (int) tree->i; + mc_ptr += 4; + } else if (tree->i < 0 && -tree->i <= 0x7FFFFFFF) { + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0xc7; + machine_code[mc_ptr++] = 0xc0 + (reg % 8); + *(int32_t *) &machine_code[mc_ptr] = (int) tree->i; + mc_ptr += 4; + } else { machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 1 : 0); machine_code[mc_ptr++] = 0xb8 + (reg % 8); *(int64_t *) &machine_code[mc_ptr] = tree->i; mc_ptr += 8; - } else { - machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 5 : 0); - machine_code[mc_ptr++] = 0x33; - machine_code[mc_ptr++] = 0xc0 + (reg % 8) + (reg % 8) * 8; } - reg_hist[reg].type = 2; - reg_hist[reg].val = tree->i; + } else { + if (reg > 7) + machine_code[mc_ptr++] = 0x45; + machine_code[mc_ptr++] = 0x33; + machine_code[mc_ptr++] = 0xc0 + (reg % 8) + (reg % 8) * 8; } - used_regs |= (1 << reg); + reg_hist[reg].type = 2; + reg_hist[reg].val = tree->i; break; case string: if (discard_result) break; reg = out_reg; if (reg == -1 || reg >= registers) - reg = get_free_reg(0); + reg = get_free_reg(1); if (!tree->i) { strcpy(&rodata[rd_ptr], tree->str); tree->i = rodata_addr + rd_ptr; @@ -1020,50 +1423,255 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int machine_code[mc_ptr++] = 0xb8 + (reg % 8); *(int64_t *) &machine_code[mc_ptr] = tree->i; mc_ptr += 8; - used_regs |= (1 << reg); break; case loop: memset(reg_hist, 0, sizeof(reg_hist)); machine_code[mc_ptr++] = 0xe9; - k = mc_ptr; + l = mc_ptr; mc_ptr += 4; compile_tree(output, &tree->args[1], 1, -1); - *(int32_t *) &machine_code[k] = mc_ptr - k - 4; - j = compile_tree(output, tree->args, 0, -1); - if (j >= registers) { - machine_code[mc_ptr++] = 0x48; - machine_code[mc_ptr++] = 0x83; - machine_code[mc_ptr++] = 0x7d; - machine_code[mc_ptr++] = -regs[j].rbp; - } else { - machine_code[mc_ptr++] = 0x48 | (j > 7 ? 1 : 0); - machine_code[mc_ptr++] = 0x83; - machine_code[mc_ptr++] = 0xf8 + (j % 8); + *(int32_t *) &machine_code[l] = mc_ptr - l - 4; + switch (tree->args->type) { + case l_not: + j = compile_tree(output, tree->args->args, 0, -1); + if (j >= registers) { + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x83; + machine_code[mc_ptr++] = 0x7d; + machine_code[mc_ptr++] = -regs[j].rbp; + } else { + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x83; + machine_code[mc_ptr++] = 0xf8 + (j % 8); + } + machine_code[mc_ptr++] = 0x00; + machine_code[mc_ptr++] = 0x0f; + machine_code[mc_ptr++] = 0x84; + *(int32_t *) &machine_code[mc_ptr] = -(mc_ptr - l); + mc_ptr += 4; + break; + case l_eq: + j = compile_tree(output, tree->args->args, 0, -1); + k = compile_tree(output, &tree->args->args[1], 0, -1); + if (j >= registers) { + if (k >= registers) { + k = compile_tree(output, &tree->args->args[1], 0, get_free_reg(0)); + } + machine_code[mc_ptr++] = 0x48 | (k > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x39; + machine_code[mc_ptr++] = 0x45 + (k % 8) * 8; + machine_code[mc_ptr++] = -regs[j].rbp; + } else { + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x3b; + if (k >= registers) { + machine_code[mc_ptr++] = 0x45 + (j % 8) * 8; + machine_code[mc_ptr++] = -regs[k].rbp; + } else { + machine_code[mc_ptr-2] |= (k > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0xc0 + (k % 8) + (j % 8) * 8; + } + } + machine_code[mc_ptr++] = 0x0f; + machine_code[mc_ptr++] = 0x84; + *(int32_t *) &machine_code[mc_ptr] = -(mc_ptr - l); + mc_ptr += 4; + break; + case l_lt: + j = compile_tree(output, tree->args->args, 0, -1); + k = compile_tree(output, &tree->args->args[1], 0, -1); + if (j >= registers) { + if (k >= registers) { + k = compile_tree(output, &tree->args->args[1], 0, get_free_reg(0)); + } + machine_code[mc_ptr++] = 0x48 | (k > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x39; + machine_code[mc_ptr++] = 0x45 + (k % 8) * 8; + machine_code[mc_ptr++] = -regs[j].rbp; + } else { + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x3b; + if (k >= registers) { + machine_code[mc_ptr++] = 0x45 + (j % 8) * 8; + machine_code[mc_ptr++] = -regs[k].rbp; + } else { + machine_code[mc_ptr-2] |= (k > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0xc0 + (k % 8) + (j % 8) * 8; + } + } + machine_code[mc_ptr++] = 0x0f; + machine_code[mc_ptr++] = 0x8c; + *(int32_t *) &machine_code[mc_ptr] = -(mc_ptr - l); + mc_ptr += 4; + break; + default: + j = compile_tree(output, tree->args, 0, -1); + if (j >= registers) { + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x83; + machine_code[mc_ptr++] = 0x7d; + machine_code[mc_ptr++] = -regs[j].rbp; + } else { + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x83; + machine_code[mc_ptr++] = 0xf8 + (j % 8); + } + machine_code[mc_ptr++] = 0x00; + machine_code[mc_ptr++] = 0x0f; + machine_code[mc_ptr++] = 0x85; + *(int32_t *) &machine_code[mc_ptr] = -(mc_ptr - l); + mc_ptr += 4; + break; } - machine_code[mc_ptr++] = 0x00; - machine_code[mc_ptr++] = 0x0f; - machine_code[mc_ptr++] = 0x85; - *(int32_t *) &machine_code[mc_ptr] = -(mc_ptr - k); - mc_ptr += 4; used_regs = (1 << 5) | (1 << 4); + for (i = 0; i < registers; i++) + reg_hist[i].type = 0; break; case ifnz: + /* j = compile_tree(output, tree->args, 0, -1); if (j >= registers) { machine_code[mc_ptr++] = 0x48; machine_code[mc_ptr++] = 0x83; machine_code[mc_ptr++] = 0x7d; machine_code[mc_ptr++] = -regs[j].rbp; + machine_code[mc_ptr++] = 0x00; } else { machine_code[mc_ptr++] = 0x48 | (j > 7 ? 1 : 0); machine_code[mc_ptr++] = 0x83; machine_code[mc_ptr++] = 0xf8 + (j % 8); + machine_code[mc_ptr++] = 0x00; } - machine_code[mc_ptr++] = 0x00; + //machine_code[mc_ptr++] = 0x00; machine_code[mc_ptr++] = 0x0f; machine_code[mc_ptr++] = 0x84; k = mc_ptr; mc_ptr += 4; + */ + switch (tree->args->type) { + case l_not: + j = compile_tree(output, tree->args->args, 0, -1); + if (j >= registers) { + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x83; + machine_code[mc_ptr++] = 0x7d; + machine_code[mc_ptr++] = -regs[j].rbp; + } else { + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x83; + machine_code[mc_ptr++] = 0xf8 + (j % 8); + } + machine_code[mc_ptr++] = 0x00; + machine_code[mc_ptr++] = 0x0f; + machine_code[mc_ptr++] = 0x85; + //*(int32_t *) &machine_code[mc_ptr] = -(mc_ptr - l); + k = mc_ptr; + mc_ptr += 4; + break; + case l_eq: + j = compile_tree(output, tree->args->args, 0, -1); + k = compile_tree(output, &tree->args->args[1], 0, -1); + if (j >= registers) { + if (k >= registers) { + k = compile_tree(output, &tree->args->args[1], 0, get_free_reg(0)); + } + machine_code[mc_ptr++] = 0x48 | (k > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x39; + machine_code[mc_ptr++] = 0x45 + (k % 8) * 8; + machine_code[mc_ptr++] = -regs[j].rbp; + } else { + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x3b; + if (k >= registers) { + machine_code[mc_ptr++] = 0x45 + (j % 8) * 8; + machine_code[mc_ptr++] = -regs[k].rbp; + } else { + machine_code[mc_ptr-2] |= (k > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0xc0 + (k % 8) + (j % 8) * 8; + } + } + machine_code[mc_ptr++] = 0x0f; + machine_code[mc_ptr++] = 0x85; + //*(int32_t *) &machine_code[mc_ptr] = -(mc_ptr - l); + k = mc_ptr; + mc_ptr += 4; + break; + case l_lt: + j = compile_tree(output, tree->args->args, 0, -1); + k = compile_tree(output, &tree->args->args[1], 0, -1); + if (j >= registers) { + if (k >= registers) { + k = compile_tree(output, &tree->args->args[1], 0, get_free_reg(0)); + } + machine_code[mc_ptr++] = 0x48 | (k > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x39; + machine_code[mc_ptr++] = 0x45 + (k % 8) * 8; + machine_code[mc_ptr++] = -regs[j].rbp; + } else { + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x3b; + if (k >= registers) { + machine_code[mc_ptr++] = 0x45 + (j % 8) * 8; + machine_code[mc_ptr++] = -regs[k].rbp; + } else { + machine_code[mc_ptr-2] |= (k > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0xc0 + (k % 8) + (j % 8) * 8; + } + } + machine_code[mc_ptr++] = 0x0f; + machine_code[mc_ptr++] = 0x8d; + //*(int32_t *) &machine_code[mc_ptr] = -(mc_ptr - l); + k = mc_ptr; + mc_ptr += 4; + break; + case b_and: + j = compile_tree(output, tree->args->args, 0, -1); + k = compile_tree(output, &tree->args->args[1], 0, -1); + if (j >= registers) { + if (k >= registers) { + k = compile_tree(output, &tree->args->args[1], 0, get_free_reg(0)); + } + machine_code[mc_ptr++] = 0x48 | (k > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x85; + machine_code[mc_ptr++] = 0x45 + (k % 8) * 8; + machine_code[mc_ptr++] = -regs[j].rbp; + } else { + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0); + machine_code[mc_ptr++] = 0x85; + if (k >= registers) { + machine_code[mc_ptr++] = 0x45 + (j % 8) * 8; + machine_code[mc_ptr++] = -regs[k].rbp; + } else { + machine_code[mc_ptr-2] |= (k > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0xc0 + (k % 8) + (j % 8) * 8; + } + } + machine_code[mc_ptr++] = 0x0f; + machine_code[mc_ptr++] = 0x84; + //*(int32_t *) &machine_code[mc_ptr] = -(mc_ptr - l); + k = mc_ptr; + mc_ptr += 4; + break; + default: + j = compile_tree(output, tree->args, 0, -1); + if (j >= registers) { + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x83; + machine_code[mc_ptr++] = 0x7d; + machine_code[mc_ptr++] = -regs[j].rbp; + } else { + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x83; + machine_code[mc_ptr++] = 0xf8 + (j % 8); + } + machine_code[mc_ptr++] = 0x00; + machine_code[mc_ptr++] = 0x0f; + machine_code[mc_ptr++] = 0x84; + //*(int32_t *) &machine_code[mc_ptr] = -(mc_ptr - l); + k = mc_ptr; + mc_ptr += 4; + break; + } // True used_regs = (1 << 5) | (1 << 4); compile_tree(output, &tree->args[1], 1, -1); @@ -1080,6 +1688,8 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int *(int32_t *) &machine_code[k] = mc_ptr - k - 4; } used_regs = (1 << 5) | (1 << 4); + for (i = 0; i < registers; i++) + reg_hist[i].type = 0; break; case l_not: j = compile_tree(output, tree->args, discard_result, out_reg); @@ -1114,6 +1724,96 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int *(int64_t *) &machine_code[mc_ptr] = 1; mc_ptr += 8; break; + case l_and: + if (tree->args->type == number) + k = tree->args->i, l = 1; + else + l = 0; + if (l) { + if (k) + reg = compile_tree(output, &tree->args[1], discard_result, out_reg); + else + reg = compile_tree(output, tree->args, discard_result, out_reg); + break; + } + j = compile_tree(output, tree->args, discard_result, out_reg); + reg = j; + if (j >= registers) { + reg = get_free_reg(1); + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x83; + machine_code[mc_ptr++] = 0x7d; + machine_code[mc_ptr++] = -regs[j].rbp; + } else { + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x83; + machine_code[mc_ptr++] = 0xf8 + (reg % 8); + } + machine_code[mc_ptr++] = 0x00; + + machine_code[mc_ptr++] = 0x0f; + machine_code[mc_ptr++] = 0x84; + //machine_code[mc_ptr++] = 0x05; + l = mc_ptr; + mc_ptr += 4; + // Not zero + compile_tree(output, &tree->args[1], discard_result, reg); + if (discard_result) { + *(int32_t *) &machine_code[l] = mc_ptr - l; + if (reg < registers) used_regs &= ~(1 << reg); + break; + } + machine_code[mc_ptr++] = 0xeb; + machine_code[mc_ptr++] = 0x03; + *(int32_t *) &machine_code[l] = mc_ptr - l - 4; + // Zero + machine_code[mc_ptr++] = 0x48 + (reg > 7 ? 5 : 0); + machine_code[mc_ptr++] = 0x33; + machine_code[mc_ptr++] = 0xc0 + (reg % 8) * 8 + (reg % 8); + break; + case l_or: + if (has_sideeffect(tree->args) || has_sideeffect(&tree->args[1]) || + !discard_result) { + if (tree->args->type == number) + k = tree->args->i, l = 1; + else + l = 0; + if (l) { + if (k) + reg = compile_tree(output, tree->args, discard_result, out_reg); + else + reg = compile_tree(output, &tree->args[1], discard_result, out_reg); + break; + } + j = compile_tree(output, tree->args, 0, out_reg); + reg = j; + if (j >= registers) { + reg = get_free_reg(1); + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0x83; + machine_code[mc_ptr++] = 0x7d; + machine_code[mc_ptr++] = -regs[j].rbp; + } else { + machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x83; + machine_code[mc_ptr++] = 0xf8 + (reg % 8); + } + machine_code[mc_ptr++] = 0x00; + } + + if (has_sideeffect(&tree->args[1]) || !discard_result) { + machine_code[mc_ptr++] = 0x0f; + machine_code[mc_ptr++] = 0x85; + l = mc_ptr; + mc_ptr += 4; + compile_tree(output, &tree->args[1], discard_result, reg); + *(int32_t *) &machine_code[l] = mc_ptr - l - 4; + } + if (discard_result) { + if (reg >= 0 && reg < registers) used_regs &= ~(1 << reg); + break; + } + break; case l_eq: j = compile_tree(output, tree->args, discard_result, -1); k = compile_tree(output, &tree->args[1], discard_result, -1); @@ -1138,7 +1838,7 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int machine_code[mc_ptr++] = 0x45 + (k % 8) * 8; machine_code[mc_ptr++] = -regs[j].rbp; } else if (k >= registers) { - machine_code[mc_ptr++] = 0x48 | (j > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0); machine_code[mc_ptr++] = 0x3b; machine_code[mc_ptr++] = 0x45 + (j % 8) * 8; machine_code[mc_ptr++] = -regs[k].rbp; @@ -1148,6 +1848,7 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int machine_code[mc_ptr++] = 0xc0 + (j % 8) + (k % 8) * 8; used_regs &= ~(1 << k); } + if (j < registers && k >= registers) used_regs &= ~(1 << reg), reg = j; machine_code[mc_ptr++] = 0x74; machine_code[mc_ptr++] = 0x05; //Offset // Not less than: @@ -1220,25 +1921,76 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int machine_code[mc_ptr++] = 0xc9; machine_code[mc_ptr++] = 0xc3; return -3; + case _syscall: + push(0, &pushed, push_order, "readstring (rax)"); + if (tree->args[1].type != comma) { + push(syscall_arg[0], &pushed, push_order, "readstring (syscall_arg 0)"); + compile_tree(output, &tree->args->args[0], 0, syscall_arg[0]); + i = 1; + } else { + for (i = 0; i < tree->args[1].args_count; i++) { + push(syscall_arg[i], &pushed, push_order, "readstring (syscall_arg loop)"); + compile_tree(output, &tree->args[1].args[i], 0, syscall_arg[i]); + } + } + + compile_tree(output, tree->args, 0, 0); + + machine_code[mc_ptr++] = 0x0f; + machine_code[mc_ptr++] = 0x05; + for (j = 0; j < i; j++) + used_regs &= ~(1 << syscall_arg[j]); + reg = 0; + break; + case readstring: + push(0, &pushed, push_order, "readstring (rax)"); + push(syscall_arg[0], &pushed, push_order, "readstring (syscall_arg 0)"); + push(syscall_arg[1], &pushed, push_order, "readstring (syscall_arg 1)"); + push(syscall_arg[2], &pushed, push_order, "readstring (syscall_arg 2)"); + compile_tree(output, &tree->args->args[0], 0, syscall_arg[0]); + compile_tree(output, &tree->args->args[1], 0, syscall_arg[1]); + compile_tree(output, &tree->args->args[2], 0, syscall_arg[2]); + + compile_tree(output, ZERO, 0, 0); + /* + machine_code[mc_ptr++] = 0x48; + machine_code[mc_ptr++] = 0xb8; + *(int64_t *) &machine_code[mc_ptr] = 0x00; + mc_ptr += 8; + */ + + machine_code[mc_ptr++] = 0x0f; + machine_code[mc_ptr++] = 0x05; + used_regs &= ~(1 << syscall_arg[0]); + used_regs &= ~(1 << syscall_arg[1]); + used_regs &= ~(1 << syscall_arg[2]); + reg = 0; + break; case writestring: push(0, &pushed, push_order, "writestring (rax)"); push(syscall_arg[0], &pushed, push_order, "writestring (syscall_arg 0)"); push(syscall_arg[1], &pushed, push_order, "writestring (syscall_arg 1)"); push(syscall_arg[2], &pushed, push_order, "writestring (syscall_arg 2)"); - compile_tree(output, &tree->args[1], 0, syscall_arg[1]); - compile_tree(output, tree->args, 0, syscall_arg[2]); - + compile_tree(output, &tree->args->args[0], 0, syscall_arg[0]); + used_regs |= (1 << syscall_arg[0]); + compile_tree(output, &tree->args->args[1], 0, syscall_arg[1]); + used_regs |= (1 << syscall_arg[1]); + compile_tree(output, &tree->args->args[2], 0, syscall_arg[2]); + used_regs |= (1 << syscall_arg[2]); + + compile_tree(output, ONE, 0, 0); + /* machine_code[mc_ptr++] = 0x48; machine_code[mc_ptr++] = 0xb8; *(int64_t *) &machine_code[mc_ptr] = 0x01; mc_ptr += 8; + */ - machine_code[mc_ptr++] = 0x48 | (syscall_arg[0] > 7 ? 1 : 0); - machine_code[mc_ptr++] = 0x89; - machine_code[mc_ptr++] = 0xc0 + (syscall_arg[0] % 8); - - machine_code[mc_ptr++] = 0x0F; + machine_code[mc_ptr++] = 0x0f; machine_code[mc_ptr++] = 0x05; + used_regs &= ~(1 << syscall_arg[0]); + used_regs &= ~(1 << syscall_arg[1]); + used_regs &= ~(1 << syscall_arg[2]); reg = 0; break; case printchar: @@ -1261,15 +2013,19 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int machine_code[mc_ptr++] = 0x45 + (j % 8) * 8; machine_code[mc_ptr++] = -next_rbp; - machine_code[mc_ptr++] = 0x48 | (j > 7 ? 1 : 0); + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0); machine_code[mc_ptr++] = 0x8d; machine_code[mc_ptr++] = 0x45 + (j % 8) * 8; machine_code[mc_ptr++] = -next_rbp; + + compile_tree(output, ONE, 0, 0); + /* machine_code[mc_ptr++] = 0x48; machine_code[mc_ptr++] = 0xb8; *(int64_t *) &machine_code[mc_ptr] = 0x01; mc_ptr += 8; + */ machine_code[mc_ptr++] = 0x48 | (arg_order[0] > 7 ? 1 : 0); machine_code[mc_ptr++] = 0x89; machine_code[mc_ptr++] = 0xc0 + (arg_order[0] % 8); @@ -1284,15 +2040,29 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int machine_code[mc_ptr++] = 0x83; machine_code[mc_ptr++] = 0xc4; machine_code[mc_ptr++] = 0x08; + + used_regs &= ~(1 << syscall_arg[0]); + used_regs &= ~(1 << syscall_arg[1]); + used_regs &= ~(1 << syscall_arg[2]); + reg = 0; + break; + case brk_sys: + push(0, &pushed, push_order, "brk_sys (rax)"); + push(syscall_arg[0], &pushed, push_order, "brk_sys (syscall_arg 0)"); + j = compile_tree(output, tree->args, 0, syscall_arg[0]); + machine_code[mc_ptr++] = 0xb8; + *(int32_t *) &machine_code[mc_ptr] = 0x0c; + mc_ptr += 4; + machine_code[mc_ptr++] = 0x0f; + machine_code[mc_ptr++] = 0x05; reg = 0; break; case exit_p: j = compile_tree(output, tree->args, 0, syscall_arg[0]); - machine_code[mc_ptr++] = 0x48; machine_code[mc_ptr++] = 0xb8; - *(int64_t *) &machine_code[mc_ptr] = 0x3c; - mc_ptr += 8; - machine_code[mc_ptr++] = 0x0F; + *(int32_t *) &machine_code[mc_ptr] = 0x3c; + mc_ptr += 4; + machine_code[mc_ptr++] = 0x0f; machine_code[mc_ptr++] = 0x05; return -2; } @@ -1302,26 +2072,27 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int fprintf(stderr, "type: %s\n", exp_str[tree->type]); exit(1); } - if (out_reg < 0 && !(pushed & (1 << reg))) { + if (out_reg == reg || out_reg < 0 && !(pushed & (1 << reg))) { out_reg = reg; + used_regs |= (1 << out_reg); goto pop_used; } else if (out_reg < 0) { k = used_regs; used_regs |= pushed; out_reg = get_free_reg(1); used_regs = k; + used_regs |= (1 << out_reg); } - if (reg == out_reg) goto pop_used; if (reg >= registers && out_reg >= registers) { j = get_free_reg(0); machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0); machine_code[mc_ptr++] = 0x8b; machine_code[mc_ptr++] = 0x45 + (j % 8) * 8; machine_code[mc_ptr++] = -regs[reg].rbp; - reg = j; - machine_code[mc_ptr++] = 0x48 | (reg > 7 ? 4 : 0); + + machine_code[mc_ptr++] = 0x48 | (j > 7 ? 4 : 0); machine_code[mc_ptr++] = 0x89; - machine_code[mc_ptr++] = 0x45 + (reg % 8) * 8; + machine_code[mc_ptr++] = 0x45 + (j % 8) * 8; machine_code[mc_ptr++] = -regs[out_reg].rbp; regs[out_reg].rev++; } else if (reg >= registers && out_reg < registers) { @@ -1345,6 +2116,7 @@ int compile_tree(FILE *output, struct expression *tree, int discard_result, int machine_code[mc_ptr-2] |= (out_reg > 7 ? 1 : 0); machine_code[mc_ptr++] = 0xc0 + (out_reg % 8) + (reg % 8) * 8; memcpy(&reg_hist[out_reg], &reg_hist[reg], sizeof(struct reg_history)); + used_regs |= (1 << out_reg); } used_regs &= ~(1 << reg); } @@ -1418,8 +2190,17 @@ char padding[4096] = { 0 }; int main(int argc, char **argv) { - int i, outfd, use_rodata = 1; + int failed = 0, i, outfd, use_rodata = 1, flag_s = 0, first_arg = 1; + if (argc > 1 && argv[1][0] == '-' && argv[1][1]) { + switch (argv[1][1]) { + case 's': + flag_s = 1; + break; + } + argc--; + first_arg = 2; + } if (argc == 1) { fname = "stdin"; outfd = 1; @@ -1427,9 +2208,16 @@ int main(int argc, char **argv) fprintf(stderr, "Too few arguments\nUsage: %s <in-file> <out-file>\n", *argv); exit(1); } else { - fname = argv[1]; - yyin = fopen(fname, "r"); - outfd = open(argv[2], O_TRUNC | O_WRONLY | O_CREAT, (S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH) & 0x1ff); + if (argv[first_arg][0] == '-' && !argv[first_arg][1]) { + fname = "stdin"; + yyin = stdin; + } else { + fname = argv[first_arg]; + yyin = fopen(fname, "r"); + } + outfd = open(argv[first_arg + 1], O_TRUNC | O_WRONLY | O_CREAT, + (S_IRUSR | S_IWUSR | S_IXUSR | S_IRGRP | + S_IXGRP | S_IROTH | S_IXOTH) & 0x1ff); if (outfd == -1) { fprintf(stderr, "Unable to open out-file\n"); exit(1); @@ -1438,7 +2226,7 @@ int main(int argc, char **argv) yyparse(); - //stringify(final, -3, 0, NULL, 0); + if (flag_s) stringify(final, -3, 0, NULL, 0); if (use_rodata) elf_hdr.phcount = 2; else elf_hdr.phcount = 1; @@ -1453,6 +2241,13 @@ int main(int argc, char **argv) for (i = 0; i < final->args_count; i++) { compile_tree(stdout, &final->args[i], 1, -1); } + for (i = 0; i < ftemp_len; i++) { + if (ftemp[i].fptr) { + fprintf(stderr, "No definition for function <%s> found\n", ftemp[i].fname); + + } + } + if (failed) exit(1); if (entry_addr < 0) { fprintf(stderr, "No _start symbol found!\n"); exit(1);