diff --git a/src/_debug/dump_ir.c b/src/_debug/dump_ir.c index a8376f309..e0312c0e3 100644 --- a/src/_debug/dump_ir.c +++ b/src/_debug/dump_ir.c @@ -147,7 +147,10 @@ static void dump_func_ir(Function *func) { char regtype = 'R'; if (vreg->vtype->flag & VRTF_FLONUM) regtype = 'F'; - fprintf(fp, " V%3d (flag=%x): live %3d - %3d, => %c%3d\n", li->virt, vreg->flag, li->start, li->end, regtype, li->phys); + fprintf(fp, " V%3d (flag=%x): live %3d - %3d, => %c%3d", li->virt, vreg->flag, li->start, li->end, regtype, li->phys); + if (li->occupied_reg_bit != 0) + fprintf(fp, ", occupied=%lx", li->occupied_reg_bit); + fprintf(fp, "\n"); } break; case LI_SPILL: diff --git a/src/cc/arch/x64/ir_x64.c b/src/cc/arch/x64/ir_x64.c index 032c840e7..cd2592463 100644 --- a/src/cc/arch/x64/ir_x64.c +++ b/src/cc/arch/x64/ir_x64.c @@ -19,31 +19,30 @@ int stackpos = 8; // Register allocator const char *kRegSizeTable[][PHYSICAL_REG_MAX] = { - {R9B, R8B, CL, DIL, SIL, BL, R12B, R13B, R14B, R15B, R10B, R11B}, - {R9W, R8W, CX, DI, SI, BX, R12W, R13W, R14W, R15W, R10W, R11W}, - {R9D, R8D, ECX, EDI, ESI, EBX, R12D, R13D, R14D, R15D, R10D, R11D}, - { R9, R8, RCX, RDI, RSI, RBX, R12, R13, R14, R15, R10, R11}, + { AL, DIL, SIL, DL, CL, R8B, R9B, BL, R12B, R13B, R14B, R15B, R10B, R11B}, + { AX, DI, SI, DX, CX, R8W, R9W, BX, R12W, R13W, R14W, R15W, R10W, R11W}, + {EAX, EDI, ESI, EDX, ECX, R8D, R9D, EBX, R12D, R13D, R14D, R15D, R10D, R11D}, + {RAX, RDI, RSI, RDX, RCX, R8, R9, RBX, R12, R13, R14, R15, R10, R11}, }; #define CALLEE_SAVE_REG_COUNT ((int)(sizeof(kCalleeSaveRegs) / sizeof(*kCalleeSaveRegs))) -static const int kCalleeSaveRegs[] = {5, 6, 7, 8, 9}; +static const int kCalleeSaveRegs[] = {7, 8, 9, 10, 11}; #define CALLER_SAVE_REG_COUNT ((int)(sizeof(kCallerSaveRegs) / sizeof(*kCallerSaveRegs))) -static const int kCallerSaveRegs[] = {10, 11}; +static const int kCallerSaveRegs[] = {12, 13}; -const int ArchRegParamMapping[] = {3, 4, -1, 2, 1, 0}; +const int ArchRegParamMapping[] = {1, 2, 3, 4, 5, 6}; // Return index of %rcx register. // Detect the index using the fact that %rcx is 4th parameter on calling convention. -#define GET_CREG_INDEX() ArchRegParamMapping[3] +#define GET_AREG_INDEX() 0 +#define GET_CREG_INDEX() 4 // ArchRegParamMapping[3] +#define GET_DREG_INDEX() 3 // ArchRegParamMapping[2] #define kReg8s (kRegSizeTable[0]) #define kReg32s (kRegSizeTable[2]) #define kReg64s (kRegSizeTable[3]) -static const char *kRegATable[] = {AL, AX, EAX, RAX}; -static const char *kRegDTable[] = {DL, DX, EDX, RDX}; - #define SZ_FLOAT (4) #define SZ_DOUBLE (8) const char *kFReg64s[PHYSICAL_FREG_MAX] = { @@ -53,6 +52,19 @@ const char *kFReg64s[PHYSICAL_FREG_MAX] = { #define CALLER_SAVE_FREG_COUNT ((int)(sizeof(kCallerSaveFRegs) / sizeof(*kCallerSaveFRegs))) static const int kCallerSaveFRegs[] = {8, 9, 10, 11, 12, 13, 14, 15}; +unsigned long detect_extra_occupied(IR *ir) { + unsigned long ioccupy = 0; + if (ir->kind == IR_LSHIFT || ir->kind == IR_RSHIFT) { + ioccupy = 1UL << GET_CREG_INDEX(); + } else if ((ir->kind == IR_MUL || ir->kind == IR_DIV || ir->kind == IR_MOD) && + !(ir->dst->vtype->flag & VRTF_FLONUM)) { + ioccupy = (1UL << GET_DREG_INDEX()) | (1UL << GET_AREG_INDEX()); + } else if (ir->kind == IR_TJMP) { + ioccupy = 1UL << GET_AREG_INDEX(); + } + return ioccupy; +} + // static const int kPow2Table[] = {-1, 0, 1, -1, 2, -1, -1, -1, 3}; @@ -217,32 +229,33 @@ static void ir_out(IR *ir) { break; case IR_MUL: - // Break %rdx - { - assert(!(ir->opr1->flag & VRF_CONST) && !(ir->opr2->flag & VRF_CONST)); - if (ir->dst->vtype->flag & VRTF_FLONUM) { - assert(ir->dst->phys == ir->opr1->phys); - const char **regs = kFReg64s; - switch (ir->dst->vtype->size) { - case SZ_FLOAT: MULSS(regs[ir->opr2->phys], regs[ir->dst->phys]); break; - case SZ_DOUBLE: MULSD(regs[ir->opr2->phys], regs[ir->dst->phys]); break; - default: assert(false); break; - } - break; + assert(!(ir->opr1->flag & VRF_CONST) && !(ir->opr2->flag & VRF_CONST)); + if (ir->dst->vtype->flag & VRTF_FLONUM) { + assert(ir->dst->phys == ir->opr1->phys); + const char **regs = kFReg64s; + switch (ir->dst->vtype->size) { + case SZ_FLOAT: MULSS(regs[ir->opr2->phys], regs[ir->dst->phys]); break; + case SZ_DOUBLE: MULSD(regs[ir->opr2->phys], regs[ir->dst->phys]); break; + default: assert(false); break; } + } else { + // Break %rax, %rdx + assert(ir->dst->phys == ir->opr1->phys); + assert(ir->opr2->phys != GET_AREG_INDEX()); assert(0 <= ir->dst->vtype->size && ir->dst->vtype->size < kPow2TableSize); int pow = kPow2Table[ir->dst->vtype->size]; assert(0 <= pow && pow < 4); const char **regs = kRegSizeTable[pow]; - const char *a = kRegATable[pow]; - MOV(regs[ir->opr1->phys], a); + const char *a = regs[GET_AREG_INDEX()]; + if (ir->opr1->phys != GET_AREG_INDEX()) + MOV(regs[ir->opr1->phys], a); MUL(regs[ir->opr2->phys]); - MOV(a, regs[ir->dst->phys]); + if (ir->dst->phys != GET_AREG_INDEX()) + MOV(a, regs[ir->dst->phys]); } break; case IR_DIV: - // Break %rdx assert(!(ir->opr1->flag & VRF_CONST) && !(ir->opr2->flag & VRF_CONST)); if (ir->dst->vtype->flag & VRTF_FLONUM) { assert(ir->dst->phys == ir->opr1->phys); @@ -252,24 +265,32 @@ static void ir_out(IR *ir) { case SZ_DOUBLE: DIVSD(regs[ir->opr2->phys], regs[ir->dst->phys]); break; default: assert(false); break; } - break; - } - if (ir->dst->vtype->size == 1) { + } else if (ir->dst->vtype->size == 1) { + assert(ir->dst->phys == ir->opr1->phys); + assert(ir->opr2->phys != GET_AREG_INDEX()); + // Break %ax if (!(ir->dst->vtype->flag & VRTF_UNSIGNED)) { - MOVSX(kReg8s[ir->opr1->phys], AX); + if (ir->opr1->phys != GET_AREG_INDEX()) + MOVSX(kReg8s[ir->opr1->phys], AX); IDIV(kReg8s[ir->opr2->phys]); } else { - MOVZX(kReg8s[ir->opr1->phys], AX); + if (ir->opr1->phys != GET_AREG_INDEX()) + MOVZX(kReg8s[ir->opr1->phys], AX); DIV(kReg8s[ir->opr2->phys]); } - MOV(AL, kReg8s[ir->dst->phys]); + if (ir->dst->phys != GET_AREG_INDEX()) + MOV(AL, kReg8s[ir->dst->phys]); } else { + assert(ir->dst->phys == ir->opr1->phys); + assert(ir->opr2->phys != GET_AREG_INDEX()); + // Break %rax, %rdx assert(0 <= ir->dst->vtype->size && ir->dst->vtype->size < kPow2TableSize); int pow = kPow2Table[ir->dst->vtype->size]; assert(0 <= pow && pow < 4); const char **regs = kRegSizeTable[pow]; - const char *a = kRegATable[pow]; - MOV(regs[ir->opr1->phys], a); + const char *a = regs[GET_AREG_INDEX()]; + if (ir->opr1->phys != GET_AREG_INDEX()) + MOV(regs[ir->opr1->phys], a); if (!(ir->dst->vtype->flag & VRTF_UNSIGNED)) { switch (pow) { case 1: CWTL(); break; @@ -287,33 +308,42 @@ static void ir_out(IR *ir) { } DIV(regs[ir->opr2->phys]); } - MOV(a, regs[ir->dst->phys]); + if (ir->dst->phys != GET_AREG_INDEX()) + MOV(a, regs[ir->dst->phys]); } break; case IR_MOD: - // Break %rdx assert(!(ir->opr1->flag & VRF_CONST) && !(ir->opr2->flag & VRF_CONST)); if (ir->dst->vtype->size == 1) { + assert(ir->dst->phys == ir->opr1->phys); + assert(ir->opr2->phys != GET_AREG_INDEX()); + // Break %ax if (!(ir->dst->vtype->flag & VRTF_UNSIGNED)) { - MOVSX(kReg8s[ir->opr1->phys], AX); + if (ir->opr1->phys != GET_AREG_INDEX()) + MOVSX(kReg8s[ir->opr1->phys], AX); IDIV(kReg8s[ir->opr2->phys]); } else { - MOVZX(kReg8s[ir->opr1->phys], AX); + if (ir->opr1->phys != GET_AREG_INDEX()) + MOVZX(kReg8s[ir->opr1->phys], AX); DIV(kReg8s[ir->opr2->phys]); } // Cannot `mov` directly from %ah to %r8b // MOV(AH, kReg8s[ir->dst->phys]); MOV(AH, AL); - MOV(AL, kReg8s[ir->dst->phys]); + if (ir->dst->phys != GET_AREG_INDEX()) + MOV(AL, kReg8s[ir->dst->phys]); } else { + assert(ir->dst->phys == ir->opr1->phys); + assert(ir->opr2->phys != GET_AREG_INDEX()); + // Break %rax, %rdx assert(0 <= ir->dst->vtype->size && ir->dst->vtype->size < kPow2TableSize); int pow = kPow2Table[ir->dst->vtype->size]; assert(0 <= pow && pow < 4); const char **regs = kRegSizeTable[pow]; - const char *a = kRegATable[pow]; - const char *d = kRegDTable[pow]; - MOV(regs[ir->opr1->phys], a); + const char *a = regs[GET_AREG_INDEX()]; + if (ir->opr1->phys != GET_AREG_INDEX()) + MOV(regs[ir->opr1->phys], a); if (!(ir->dst->vtype->flag & VRTF_UNSIGNED)) { switch (pow) { case 1: CWTL(); break; @@ -331,7 +361,9 @@ static void ir_out(IR *ir) { } DIV(regs[ir->opr2->phys]); } - MOV(d, regs[ir->dst->phys]); + const int dreg = GET_DREG_INDEX(); + if (ir->dst->phys != dreg) + MOV(regs[dreg], regs[ir->dst->phys]); } break; @@ -392,25 +424,11 @@ static void ir_out(IR *ir) { if (ir->opr2->flag & VRF_CONST) { SHL(IM(ir->opr2->fixnum), dst); } else { - // TODO: handle register mapping in regalloc. const int creg = GET_CREG_INDEX(); - if (ir->opr2->phys == creg) { - SHL(CL, dst); - } else if (ir->dst->phys == creg) { - assert(ir->opr2->phys != creg); - const char *rega = kRegATable[pow]; - PUSH(RAX); - MOV(dst, rega); - MOV(kReg8s[ir->opr2->phys], CL); - SHL(CL, rega); - MOV(rega, dst); - POP(RAX); - } else { - PUSH(RCX); - MOV(kReg8s[ir->opr2->phys], CL); - SHL(CL, dst); - POP(RCX); - } + assert(ir->opr2->phys != creg); + assert(ir->dst->phys != creg); + MOV(kReg8s[ir->opr2->phys], CL); + SHL(CL, dst); } } break; @@ -427,25 +445,11 @@ static void ir_out(IR *ir) { if (ir->opr2->flag & VRF_CONST) { RSHIFT_INST(IM(ir->opr2->fixnum), dst); } else { - // TODO: handle register mapping in regalloc. const int creg = GET_CREG_INDEX(); - if (ir->opr2->phys == creg) { - RSHIFT_INST(CL, dst); - } else if (ir->dst->phys == creg) { - assert(ir->opr2->phys != creg); - const char *rega = kRegATable[pow]; - PUSH(RAX); - MOV(dst, rega); - MOV(kReg8s[ir->opr2->phys], CL); - RSHIFT_INST(CL, rega); - MOV(rega, dst); - POP(RAX); - } else { - PUSH(RCX); - MOV(kReg8s[ir->opr2->phys], CL); - RSHIFT_INST(CL, dst); - POP(RCX); - } + assert(ir->opr2->phys != creg); + assert(ir->dst->phys != creg); + MOV(kReg8s[ir->opr2->phys], CL); + RSHIFT_INST(CL, dst); } #undef RSHIFT_INST } @@ -564,7 +568,9 @@ static void ir_out(IR *ir) { case IR_TJMP: { + // Break %rax int phys = ir->opr1->phys; + assert(phys != GET_AREG_INDEX()); const int powd = 3; assert(0 <= ir->opr1->vtype->size && ir->opr1->vtype->size < kPow2TableSize); int pows = kPow2Table[ir->opr1->vtype->size]; @@ -690,7 +696,8 @@ static void ir_out(IR *ir) { int pow = kPow2Table[ir->dst->vtype->size]; assert(0 <= pow && pow < 4); const char **regs = kRegSizeTable[pow]; - MOV(kRegATable[pow], regs[ir->dst->phys]); + if (ir->dst->phys != GET_AREG_INDEX()) + MOV(regs[GET_AREG_INDEX()], regs[ir->dst->phys]); } } } @@ -703,17 +710,15 @@ static void ir_out(IR *ir) { case SZ_DOUBLE: MOVSD(kFReg64s[ir->opr1->phys], XMM0); break; default: assert(false); break; } - break; - } - { + } else { assert(0 <= ir->opr1->vtype->size && ir->opr1->vtype->size < kPow2TableSize); int pow = kPow2Table[ir->opr1->vtype->size]; assert(0 <= pow && pow < 4); const char **regs = kRegSizeTable[pow]; if (ir->opr1->flag & VRF_CONST) - MOV(IM(ir->opr1->fixnum), kRegATable[pow]); - else - MOV(regs[ir->opr1->phys], kRegATable[pow]); + MOV(IM(ir->opr1->fixnum), regs[GET_AREG_INDEX()]); + else if (ir->opr1->phys != GET_AREG_INDEX()) + MOV(regs[ir->opr1->phys], regs[GET_AREG_INDEX()]); } break; @@ -771,7 +776,6 @@ static void ir_out(IR *ir) { } else { // x64 support signed 64bit-signed-int to double only, so pass half value // (precision is lost anyway). - // Break %rax const Name *neglabel = alloc_label(); const Name *skiplabel = alloc_label(); TEST(s, s); @@ -783,6 +787,7 @@ static void ir_out(IR *ir) { } JMP(fmt_name(skiplabel)); EMIT_LABEL(fmt_name(neglabel)); + PUSH(RAX); // Push %rax to avoid Break MOV(s, RAX); SHR(IM(1), RAX); switch (ir->dst->vtype->size) { @@ -790,6 +795,7 @@ static void ir_out(IR *ir) { case SZ_DOUBLE: CVTSI2SD(RAX, d); ADDSD(d, d); break; default: assert(false); break; } + POP(RAX); // Pop %rax EMIT_LABEL(fmt_name(skiplabel)); } } @@ -871,7 +877,8 @@ static void ir_out(IR *ir) { int pow = kPow2Table[ir->dst->vtype->size]; assert(0 <= pow && pow < 4); const char **regs = kRegSizeTable[pow]; - MOV(kRegATable[pow], regs[ir->dst->phys]); + if (ir->dst->phys != GET_AREG_INDEX()) + MOV(regs[GET_AREG_INDEX()], regs[ir->dst->phys]); } break; diff --git a/src/cc/backend/codegen.c b/src/cc/backend/codegen.c index 6ccbc5640..575115ba7 100644 --- a/src/cc/backend/codegen.c +++ b/src/cc/backend/codegen.c @@ -848,8 +848,11 @@ static void gen_defun(Function *func) { extern const int ArchRegParamMapping[]; fnbe->ra = curra = new_reg_alloc(ArchRegParamMapping, PHYSICAL_REG_MAX, PHYSICAL_REG_TEMPORARY); #ifndef __NO_FLONUM - fnbe->ra->fphys_max = PHYSICAL_FREG_MAX; - fnbe->ra->fphys_temporary_count = PHYSICAL_FREG_TEMPORARY; + curra->fphys_max = PHYSICAL_FREG_MAX; + curra->fphys_temporary_count = PHYSICAL_FREG_TEMPORARY; +#endif +#if defined(__x86_64__) + curra->detect_extra_occupied = detect_extra_occupied; #endif // Allocate BBs for goto labels. diff --git a/src/cc/backend/codegen_expr.c b/src/cc/backend/codegen_expr.c index ed6e76e7c..342abe923 100644 --- a/src/cc/backend/codegen_expr.c +++ b/src/cc/backend/codegen_expr.c @@ -362,20 +362,24 @@ static Expr *simplify_funarg(Expr *arg) { // Binary operators case EX_MUL: case EX_DIV: + case EX_MOD: + case EX_LSHIFT: + case EX_RSHIFT: #if defined(__x86_64__) - // On x64, MUL and DIV instruction implicitly uses (breaks) %rdx + // On x64, MUL, DIV and MOD instruction implicitly uses (breaks) %rdx // and %rdx is used as 3rd argument. + // Similary, Shift instructions (SHL, SHR) uses %cl which is 4th argument. // so must be precalculated. return gen_expr_as_tmpvar(arg); +#else + // Except x64, these opcodes can be used in function argument. + // Fallthrough #endif case EX_ADD: case EX_SUB: - case EX_MOD: case EX_BITAND: case EX_BITOR: case EX_BITXOR: - case EX_LSHIFT: - case EX_RSHIFT: case EX_EQ: case EX_NE: case EX_LT: diff --git a/src/cc/backend/ir.h b/src/cc/backend/ir.h index f3d52f8c6..483e2cb4f 100644 --- a/src/cc/backend/ir.h +++ b/src/cc/backend/ir.h @@ -229,3 +229,4 @@ typedef struct FuncBackend { // void tweak_irs(FuncBackend *fnbe); +unsigned long detect_extra_occupied(IR* ir); diff --git a/src/cc/backend/regalloc.c b/src/cc/backend/regalloc.c index a218fbbec..7926bdfd4 100644 --- a/src/cc/backend/regalloc.c +++ b/src/cc/backend/regalloc.c @@ -17,6 +17,7 @@ RegAlloc *new_reg_alloc(const int *reg_param_mapping, int phys_max, int temporar ra->vregs = new_vector(); ra->intervals = NULL; ra->sorted_intervals = NULL; + ra->detect_extra_occupied = NULL; ra->reg_param_mapping = reg_param_mapping; ra->phys_max = phys_max; ra->phys_temporary_count = temporary_count; @@ -94,14 +95,12 @@ typedef struct { int phys_max; int phys_temporary; int active_count; - int active_tmp_count; unsigned long using_bits; unsigned long used_bits; } PhysicalRegisterSet; static void expire_old_intervals(PhysicalRegisterSet *p, int start) { int active_count = p->active_count; - int active_tmp_count = p->active_tmp_count; unsigned long using_bits = p->using_bits; int j; for (j = 0; j < active_count; ++j) { @@ -110,12 +109,9 @@ static void expire_old_intervals(PhysicalRegisterSet *p, int start) { break; int phys = li->phys; using_bits &= ~(1UL << phys); - if (phys < p->phys_temporary) - --active_tmp_count; } remove_active(p->active, active_count, 0, j); p->active_count = active_count - j; - p->active_tmp_count = active_tmp_count; p->using_bits = using_bits; } @@ -138,8 +134,8 @@ static void set_inout_interval(Vector *vregs, LiveInterval *intervals, int nip) static void check_live_interval(BBContainer *bbcon, int vreg_count, LiveInterval *intervals) { for (int i = 0; i < vreg_count; ++i) { LiveInterval *li = &intervals[i]; + li->occupied_reg_bit = 0; li->state = LI_NORMAL; - li->flag = 0; li->start = li->end = -1; li->virt = i; li->phys = -1; @@ -170,43 +166,75 @@ static void check_live_interval(BBContainer *bbcon, int vreg_count, LiveInterval } } -static void detect_live_interval_flags(BBContainer *bbcon, int vreg_count, +void occupy_regs(RegAlloc *ra, Vector *actives, unsigned long ioccupy, unsigned long foccupy) { + for (int k = 0; k < actives->len; ++k) { + LiveInterval *li = actives->data[k]; + VReg *vreg = ra->vregs->data[li->virt]; + assert(vreg != NULL); + li->occupied_reg_bit |= (vreg->vtype->flag & VRTF_FLONUM) ? foccupy : ioccupy; + } +} + +static void detect_live_interval_flags(RegAlloc *ra, BBContainer *bbcon, int vreg_count, LiveInterval **sorted_intervals) { Vector *inactives = new_vector(); - for (int i = 0; i < vreg_count; ++i) - vec_push(inactives, sorted_intervals[i]); Vector *actives = new_vector(); + for (int i = 0; i < vreg_count; ++i) { + LiveInterval *li = sorted_intervals[i]; + vec_push(li->start < 0 ? actives : inactives, li); + } int nip = 0; - bool calling = false; + unsigned long iargset = 0, fargset = 0; for (int i = 0; i < bbcon->bbs->len; ++i) { BB *bb = bbcon->bbs->data[i]; for (int j = 0; j < bb->irs->len; ++j, ++nip) { - while (inactives->len > 0) { - LiveInterval *li = inactives->data[0]; - if (li->start > nip) - break; - vec_remove_at(inactives, 0); - vec_push(actives, li); + IR *ir = bb->irs->data[j]; + if (ra->detect_extra_occupied != NULL) { + unsigned long ioccupy = (*ra->detect_extra_occupied)(ir); + if (ioccupy != 0) + occupy_regs(ra, actives, ioccupy, 0); + } + + if (ir->kind == IR_PUSHARG) { + VReg *opr1 = ir->opr1; + if (opr1->vtype->flag & VRTF_FLONUM) { + int n = ir->pusharg.index; + // Assume same order on FP-register. + fargset |= 1UL << n; + } else { + int n = ra->reg_param_mapping[ir->pusharg.index]; + if (n >= 0) + iargset |= 1UL << n; + } } + if (iargset != 0 || fargset != 0) + occupy_regs(ra, actives, iargset, fargset); + + // Deactivate registers which end at this ip. for (int k = 0; k < actives->len; ++k) { LiveInterval *li = actives->data[k]; - if (li->end < nip) + if (li->end <= nip) vec_remove_at(actives, k--); } - IR *ir = bb->irs->data[j]; - if (ir->kind == IR_PRECALL) - calling = true; - if (calling) { - for (int k = 0; k < actives->len; ++k) { - LiveInterval *li = actives->data[k]; - if (li->start < nip) - li->flag |= LIF_CONTAINS_CALL; - } + // Call instruction breaks registers which contain in their live interval (start < nip < end). + if (ir->kind == IR_CALL) { + // Non-saved registers on calling convention. + const unsigned long ibroken = (1UL << ra->phys_temporary_count) - 1; + const unsigned long fbroken = (1UL << ra->fphys_temporary_count) - 1; + occupy_regs(ra, actives, ibroken, fbroken); + iargset = fargset = 0; + } + + // Activate registers after usage checked. + while (inactives->len > 0) { + LiveInterval *li = inactives->data[0]; + if (li->start > nip) + break; + vec_remove_at(inactives, 0); + vec_push(actives, li); } - if (ir->kind == IR_CALL) - calling = false; } } @@ -221,7 +249,6 @@ static void linear_scan_register_allocation(RegAlloc *ra, LiveInterval **sorted_ .phys_max = ra->phys_max, .phys_temporary = ra->phys_temporary_count, .active_count = 0, - .active_tmp_count = 0, .using_bits = 0, .used_bits = 0, }; @@ -230,7 +257,6 @@ static void linear_scan_register_allocation(RegAlloc *ra, LiveInterval **sorted_ .phys_max = ra->fphys_max, .phys_temporary = ra->fphys_temporary_count, .active_count = 0, - .active_tmp_count = 0, .using_bits = 0, .used_bits = 0, }; @@ -242,60 +268,45 @@ static void linear_scan_register_allocation(RegAlloc *ra, LiveInterval **sorted_ if (li->state != LI_NORMAL) continue; expire_old_intervals(&iregset, li->start); - PhysicalRegisterSet *prsp = &iregset; expire_old_intervals(&fregset, li->start); + + PhysicalRegisterSet *prsp = &iregset; if (((VReg*)ra->vregs->data[li->virt])->vtype->flag & VRTF_FLONUM) prsp = &fregset; int start_index = 0; - int active_count = prsp->active_count; - if (li->flag & LIF_CONTAINS_CALL) { - start_index = prsp->phys_temporary; - active_count = (active_count - prsp->active_tmp_count) + prsp->phys_temporary; - } - - if (active_count >= prsp->phys_max) { - split_at_interval(ra, prsp->active, prsp->active_count, li); - } else { - int regno = -1; - VReg *vreg = ra->vregs->data[li->virt]; - int ip = vreg->reg_param_index; - if (ip >= 0) { - if (!(li->flag & LIF_CONTAINS_CALL)) { - // If the live interval doesn't contain `CALL` instruction, - // prefer to use the parameter register passed to the function. - if (vreg->vtype->flag & VRTF_FLONUM) { - // Assume floating-pointer parameter registers are same order, - // and no mapping required. - } else { - ip = ra->reg_param_mapping[ip]; - if (ip < 0) { - // The parameter register is not mapped => cannot hold the value in the given register, - // assign to non-temporary register to be on the safe side. - start_index = prsp->phys_temporary; - active_count = (active_count - prsp->active_tmp_count) + prsp->phys_temporary; - } - } - - if (ip >= 0 && !(prsp->using_bits & (1UL << ip))) - regno = ip; - } + int regno = -1; + VReg *vreg = ra->vregs->data[li->virt]; + int ip = vreg->reg_param_index; + unsigned long occupied = prsp->using_bits | li->occupied_reg_bit; + if (ip >= 0) { + if (vreg->vtype->flag & VRTF_FLONUM) { + // Assume floating-pointer parameter registers are same order, + // and no mapping required. + } else { + ip = ra->reg_param_mapping[ip]; } - if (regno < 0) { - for (int j = start_index; j < prsp->phys_max; ++j) { - if (!(prsp->using_bits & (1UL << j))) { - regno = j; - break; - } + + if (ip >= 0 && !(occupied & (1UL << ip))) + regno = ip; + else + start_index = prsp->phys_temporary; + } + if (regno < 0) { + for (int j = start_index; j < prsp->phys_max; ++j) { + if (!(occupied & (1UL << j))) { + regno = j; + break; } - assert(regno >= 0); } + } + if (regno >= 0) { li->phys = regno; prsp->using_bits |= 1UL << regno; insert_active(prsp->active, prsp->active_count, li); ++prsp->active_count; - if (regno < prsp->phys_temporary) - ++prsp->active_tmp_count; + } else { + split_at_interval(ra, prsp->active, prsp->active_count, li); } prsp->used_bits |= prsp->using_bits; } @@ -432,7 +443,7 @@ void alloc_physical_registers(RegAlloc *ra, BBContainer *bbcon) { qsort(sorted_intervals, vreg_count, sizeof(LiveInterval*), sort_live_interval); ra->sorted_intervals = sorted_intervals; - detect_live_interval_flags(bbcon, vreg_count, sorted_intervals); + detect_live_interval_flags(ra, bbcon, vreg_count, sorted_intervals); linear_scan_register_allocation(ra, sorted_intervals, vreg_count); // Spill vregs. diff --git a/src/cc/backend/regalloc.h b/src/cc/backend/regalloc.h index 23a85d11a..a64e97004 100644 --- a/src/cc/backend/regalloc.h +++ b/src/cc/backend/regalloc.h @@ -7,6 +7,7 @@ typedef struct BBContainer BBContainer; typedef struct Function Function; +typedef struct IR IR; typedef struct VReg VReg; typedef struct VRegType VRegType; typedef struct Vector Vector; @@ -17,11 +18,9 @@ enum LiveIntervalState { LI_CONST, }; -#define LIF_CONTAINS_CALL (1 << 0) - typedef struct LiveInterval { + unsigned long occupied_reg_bit; // Represent occupied registers in bit. enum LiveIntervalState state; - int flag; int start; int end; int virt; // Virtual register no. @@ -32,6 +31,7 @@ typedef struct RegAlloc { Vector *vregs; // LiveInterval *intervals; // size=vregs->len LiveInterval **sorted_intervals; + unsigned long (*detect_extra_occupied)(IR *ir); const int *reg_param_mapping; int phys_max; // Max physical register count. @@ -45,3 +45,4 @@ typedef struct RegAlloc { RegAlloc *new_reg_alloc(const int *reg_param_mapping, int phys_max, int temporary_count); VReg *reg_alloc_spawn(RegAlloc *ra, const VRegType *vtype, int flag); void alloc_physical_registers(RegAlloc *ra, BBContainer *bbcon); +void occupy_regs(RegAlloc *ra, Vector *actives, unsigned long ioccupy, unsigned long foccupy); diff --git a/src/config.h b/src/config.h index 20267e7dd..f7b9b27ba 100644 --- a/src/config.h +++ b/src/config.h @@ -22,7 +22,7 @@ #endif #if defined(__x86_64__) -#define PHYSICAL_REG_TEMPORARY (5) +#define PHYSICAL_REG_TEMPORARY (7) #define PHYSICAL_REG_MAX (PHYSICAL_REG_TEMPORARY + 7) #define PHYSICAL_FREG_TEMPORARY (8) #define PHYSICAL_FREG_MAX (PHYSICAL_FREG_TEMPORARY + 8)