Skip to content

Commit

Permalink
Improved stack usage calculation and recycling parameters
Browse files Browse the repository at this point in the history
  • Loading branch information
RealNeGate committed Dec 21, 2022
1 parent a1c9130 commit 7a33c64
Show file tree
Hide file tree
Showing 4 changed files with 22 additions and 25 deletions.
32 changes: 15 additions & 17 deletions src/tb/codegen/generic_addrdesc.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ static void GAD_FN(spill)(Ctx* restrict ctx, TB_Function* f, GAD_VAL* dst_val, G
static void GAD_FN(goto)(Ctx* restrict ctx, TB_Label l);
static void GAD_FN(ret_jmp)(Ctx* restrict ctx);
static void GAD_FN(initial_reg_alloc)(Ctx* restrict ctx);
static size_t GAD_FN(resolve_params)(Ctx* restrict ctx, TB_Function* f, GAD_VAL* values);
static void GAD_FN(resolve_params)(Ctx* restrict ctx, TB_Function* f, GAD_VAL* values);
static GAD_VAL GAD_FN(eval)(Ctx* restrict ctx, TB_Function* f, TB_Reg r);
static void GAD_FN(resolve_stack_slot)(Ctx* restrict ctx, TB_Function* f, TB_Node* restrict n);
static void GAD_FN(return)(Ctx* restrict ctx, TB_Function* f, TB_Node* restrict n);
Expand Down Expand Up @@ -367,15 +367,19 @@ static void GAD_FN(regalloc_step)(Ctx* restrict ctx, TB_Function* f, TB_Reg r) {
} else {
bool success = false;
if (ra.can_recycle && ctx->active_count > 0) {
TB_Reg last = ctx->active[ctx->active_count - 1];
LiveInterval last_li = get_live_interval(ctx, last);

if (last_li.end == r_li.start) {
printf(" recycle r%u for r%u\n", last, r);

ctx->values[r] = ctx->values[last];
ctx->active[ctx->active_count - 1] = r;
success = true;
printf(" try recycling r%u\n", r);
FOREACH_N(k, 0, ctx->active_count) {
TB_Reg other_r = ctx->active[k];
LiveInterval other_li = get_live_interval(ctx, other_r);

printf(" [%zu] = r%u\n", k, other_r);
if (other_li.end == r_li.start) {
printf(" recycle r%u for r%u\n", other_r, r);

ctx->values[r] = ctx->values[other_r];
ctx->active[k] = r;
success = true;
}
}
}

Expand Down Expand Up @@ -599,11 +603,9 @@ static TB_FunctionOutput GAD_FN(compile_function)(TB_Function* restrict f, const
GAD_FN(initial_reg_alloc)(ctx);

ctx->active = tb_platform_heap_alloc(f->node_count * sizeof(TB_Reg));
ctx->active_count = GAD_FN(resolve_params)(ctx, f, ctx->values);
GAD_FN(resolve_params)(ctx, f, ctx->values);
}

size_t original_stack_usage = ctx->stack_usage;

// calculate the order of the nodes, it helps since node indices
// don't actually tell us this especially once the optimizer has
// taken a jab at it.
Expand Down Expand Up @@ -653,10 +655,6 @@ static TB_FunctionOutput GAD_FN(compile_function)(TB_Function* restrict f, const
f->lines[0].pos = 0;
}

if (ctx->stack_usage == original_stack_usage) {
ctx->stack_usage = 0;
}

// we're done, clean up
TB_FunctionOutput func_out = {
.linkage = f->linkage,
Expand Down
2 changes: 1 addition & 1 deletion src/tb/tb.c
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ static tb_atomic_int total_tid;

ICodeGen* tb__find_code_generator(TB_Module* m) {
switch (m->target_arch) {
#if 0
#if 1
case TB_ARCH_X86_64: return &tb__x64v2_codegen;
#else
case TB_ARCH_X86_64: return &tb__x64_codegen;
Expand Down
7 changes: 2 additions & 5 deletions src/tb/x64/x64_new.c
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ static size_t x64v2_resolve_stack_usage(Ctx* restrict ctx, TB_Function* f, size_
size_t usage = stack_usage + (caller_usage * 8);

// Align stack usage to 16bytes and add 8 bytes for the return address
if (usage > 0) {
if (usage > 16) {
usage = align_up(usage + 8, 16) + 8;
} else {
usage = 8;
Expand All @@ -198,9 +198,8 @@ static void x64v2_resolve_local_patches(Ctx* restrict ctx, TB_Function* f) {
}
}

static size_t x64v2_resolve_params(Ctx* restrict ctx, TB_Function* f, GAD_VAL* values) {
static void x64v2_resolve_params(Ctx* restrict ctx, TB_Function* f, GAD_VAL* values) {
bool is_sysv = (f->super.module->target_abi == TB_ABI_SYSTEMV);
size_t active_count = 0;
const TB_FunctionPrototype* restrict proto = f->prototype;

size_t param_count = proto->param_count;
Expand Down Expand Up @@ -271,8 +270,6 @@ static size_t x64v2_resolve_params(Ctx* restrict ctx, TB_Function* f, GAD_VAL* v
INST2(MOV, &dst, &src, TB_TYPE_I64);
}
}

return active_count;
}

static void x64v2_resolve_stack_slot(Ctx* restrict ctx, TB_Function* f, TB_Node* restrict n) {
Expand Down
6 changes: 4 additions & 2 deletions src/tb/x64/x64_proepi.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ void x64_emit_win64eh_unwind_info(TB_Emitter* e, TB_FunctionOutput* out_f, uint6
}

size_t x64_emit_prologue(uint8_t* out, uint64_t saved, uint64_t stack_usage) {
if ((tb_popcount(saved & 0xFFFF) & 1) == 0) stack_usage += 8;
// align the stack correctly
if (saved && (tb_popcount(saved & 0xFFFF) & 1) == 0) stack_usage += 8;
// If the stack usage is zero we don't need a prologue
if (stack_usage == 8) return 0;

Expand Down Expand Up @@ -96,7 +97,8 @@ size_t x64_emit_prologue(uint8_t* out, uint64_t saved, uint64_t stack_usage) {
}

size_t x64_emit_epilogue(uint8_t* out, uint64_t saved, uint64_t stack_usage) {
if ((tb_popcount(saved & 0xFFFF) & 1) == 0) stack_usage += 8;
// align the stack correctly
if (saved && (tb_popcount(saved & 0xFFFF) & 1) == 0) stack_usage += 8;

// if the stack isn't used then just return
if (stack_usage == 8) {
Expand Down

0 comments on commit 7a33c64

Please sign in to comment.