Message ID | f1a7d2617c26c1745fafe13bbe3bc3b042c14217.1542321076.git.alistair.francis@wdc.com |
---|---|
State | New |
Headers | show |
Series | Add RISC-V TCG backend support | expand |
On 11/15/18 11:36 PM, Alistair Francis wrote: > +static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, > + TCGReg arg2, TCGLabel *l) > +{ > + RISCVInsn op = tcg_brcond_to_riscv[cond].op; > + bool swap = tcg_brcond_to_riscv[cond].swap; > + > + tcg_out_opc_branch(s, op, swap ? arg2 : arg1, swap ? arg1 : arg2, 0); You might want to tcg_debug_assert(op != 0) here. > + if (l->has_value) { > + reloc_sbimm12(s->code_ptr - 1, l->u.value_ptr); I'm concerned about the conditional branch range. +-4K isn't much to work with. The minimum we have for other hosts is +-32K. We have two options: (1) greatly reduce the max size of the TB for this host; (2) be prepared to emit a 2 insn sequence: conditional branch across unconditional branch, with forward branches that turn out to be small patched with a nop. FWIW, the first case would be done via modification of tcg_op_buf_full. You might have to go as low as 500 opcodes, I'm not sure. > +static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target) > +{ > + ptrdiff_t offset = tcg_pcrel_diff(s, target); > + tcg_debug_assert(offset == sextract64(offset, 0, 26)); > + tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, offset); > +} > + > +static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target) > +{ > + ptrdiff_t offset = tcg_pcrel_diff(s, target); > + > + if (offset == sextract64(offset, 0, 26)) { > + tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, offset); > + } else { > + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target); > + tcg_out_opc_jump(s, OPC_JAL, TCG_REG_TMP0, 0); > + } > +} How are these to be used? I guess I'll find out... > +static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) > +{ > + TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA; > + ptrdiff_t offset = tcg_pcrel_diff(s, arg); > + if (offset == sextract32(offset, 1, 20) << 1) { sextract64. > + /* short jump: -2097150 to 2097152 */ > + tcg_out_opc_jump(s, OPC_JAL, link, offset); > + } else if (TCG_TARGET_REG_BITS == 32 || > + offset == sextract32(offset, 1, 31) << 1) { sextract64. > + /* long jump: -2147483646 to 2147483648 */ > + tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP0, 0); > + tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, 0); > + reloc_call(s->code_ptr - 2, arg); > + } else if (TCG_TARGET_REG_BITS == 64) { > + /* far jump: 64-bit */ > + tcg_target_long imm = sextract32((tcg_target_long)arg, 0, 12); > + tcg_target_long base = (tcg_target_long)arg - imm; > + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, base); > + tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, imm); r~
On Fri, Nov 16, 2018 at 1:14 AM Richard Henderson <richard.henderson@linaro.org> wrote: > > On 11/15/18 11:36 PM, Alistair Francis wrote: > > +static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, > > + TCGReg arg2, TCGLabel *l) > > +{ > > + RISCVInsn op = tcg_brcond_to_riscv[cond].op; > > + bool swap = tcg_brcond_to_riscv[cond].swap; > > + > > + tcg_out_opc_branch(s, op, swap ? arg2 : arg1, swap ? arg1 : arg2, 0); > > You might want to tcg_debug_assert(op != 0) here. > > > + if (l->has_value) { > > + reloc_sbimm12(s->code_ptr - 1, l->u.value_ptr); > > I'm concerned about the conditional branch range. +-4K isn't much to work > with. The minimum we have for other hosts is +-32K. > > We have two options: (1) greatly reduce the max size of the TB for this host; > (2) be prepared to emit a 2 insn sequence: conditional branch across > unconditional branch, with forward branches that turn out to be small patched > with a nop. > > FWIW, the first case would be done via modification of tcg_op_buf_full. You > might have to go as low as 500 opcodes, I'm not sure. How do we do option 2? > > > +static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target) > > +{ > > + ptrdiff_t offset = tcg_pcrel_diff(s, target); > > + tcg_debug_assert(offset == sextract64(offset, 0, 26)); > > + tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, offset); > > +} > > + > > +static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target) > > +{ > > + ptrdiff_t offset = tcg_pcrel_diff(s, target); > > + > > + if (offset == sextract64(offset, 0, 26)) { > > + tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, offset); > > + } else { > > + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target); > > + tcg_out_opc_jump(s, OPC_JAL, TCG_REG_TMP0, 0); > > + } > > +} > > How are these to be used? I guess I'll find out... It's called on a exit TB op. I'm not sure if that's correct though. Alistair > > > +static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) > > +{ > > + TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA; > > + ptrdiff_t offset = tcg_pcrel_diff(s, arg); > > + if (offset == sextract32(offset, 1, 20) << 1) { > > sextract64. > > > + /* short jump: -2097150 to 2097152 */ > > + tcg_out_opc_jump(s, OPC_JAL, link, offset); > > + } else if (TCG_TARGET_REG_BITS == 32 || > > + offset == sextract32(offset, 1, 31) << 1) { > > sextract64. > > > + /* long jump: -2147483646 to 2147483648 */ > > + tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP0, 0); > > + tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, 0); > > + reloc_call(s->code_ptr - 2, arg); > > + } else if (TCG_TARGET_REG_BITS == 64) { > > + /* far jump: 64-bit */ > > + tcg_target_long imm = sextract32((tcg_target_long)arg, 0, 12); > > + tcg_target_long base = (tcg_target_long)arg - imm; > > + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, base); > > + tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, imm); > > > r~
On 11/21/18 12:49 AM, Alistair Francis wrote: > On Fri, Nov 16, 2018 at 1:14 AM Richard Henderson > <richard.henderson@linaro.org> wrote: >> >> On 11/15/18 11:36 PM, Alistair Francis wrote: >>> +static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, >>> + TCGReg arg2, TCGLabel *l) >>> +{ >>> + RISCVInsn op = tcg_brcond_to_riscv[cond].op; >>> + bool swap = tcg_brcond_to_riscv[cond].swap; >>> + >>> + tcg_out_opc_branch(s, op, swap ? arg2 : arg1, swap ? arg1 : arg2, 0); >> >> You might want to tcg_debug_assert(op != 0) here. >> >>> + if (l->has_value) { >>> + reloc_sbimm12(s->code_ptr - 1, l->u.value_ptr); >> >> I'm concerned about the conditional branch range. +-4K isn't much to work >> with. The minimum we have for other hosts is +-32K. >> >> We have two options: (1) greatly reduce the max size of the TB for this host; >> (2) be prepared to emit a 2 insn sequence: conditional branch across >> unconditional branch, with forward branches that turn out to be small patched >> with a nop. >> >> FWIW, the first case would be done via modification of tcg_op_buf_full. You >> might have to go as low as 500 opcodes, I'm not sure. > > How do we do option 2? If l->has_value, just check the actual range. But of course backward branching isn't that common in tcg generated code. Most branches within the TB are short forward branches, but we also don't know how short is short. But every TB begins with a test of env->exit_code and a conditional branch to the end of the block, where we place some code to return to the main loop and return the pointer to the TB at which we exited. Thus every TB has a branch that spans the size of the entire TB. So, invent (or repurpose) an R_RISCV_FOO value. It doesn't matter which because it's private within tcg/riscv/. Just add some commentary. (See e.g. tcg/sparc/ and its use of R_SPARC_13.) While generating code, emit the conditional branch as normal; leave the unknown destination 0 for now. Emit a nop as the second insn. When resolving R_RISCV_FOO, if the conditional branch is in range, great! Just patch it. If it is out of range, then you need to edit the conditional branch to reverse the condition (insn ^ (1 << 12)) and branch to pc+8, i.e. over the next instruction. Which was a nop during generation, but you will now install jal r0,dest going to the real destination. r~
On Tue, Nov 20, 2018 at 11:40 PM Richard Henderson <richard.henderson@linaro.org> wrote: > > On 11/21/18 12:49 AM, Alistair Francis wrote: > > On Fri, Nov 16, 2018 at 1:14 AM Richard Henderson > > <richard.henderson@linaro.org> wrote: > >> > >> On 11/15/18 11:36 PM, Alistair Francis wrote: > >>> +static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, > >>> + TCGReg arg2, TCGLabel *l) > >>> +{ > >>> + RISCVInsn op = tcg_brcond_to_riscv[cond].op; > >>> + bool swap = tcg_brcond_to_riscv[cond].swap; > >>> + > >>> + tcg_out_opc_branch(s, op, swap ? arg2 : arg1, swap ? arg1 : arg2, 0); > >> > >> You might want to tcg_debug_assert(op != 0) here. > >> > >>> + if (l->has_value) { > >>> + reloc_sbimm12(s->code_ptr - 1, l->u.value_ptr); > >> > >> I'm concerned about the conditional branch range. +-4K isn't much to work > >> with. The minimum we have for other hosts is +-32K. > >> > >> We have two options: (1) greatly reduce the max size of the TB for this host; > >> (2) be prepared to emit a 2 insn sequence: conditional branch across > >> unconditional branch, with forward branches that turn out to be small patched > >> with a nop. > >> > >> FWIW, the first case would be done via modification of tcg_op_buf_full. You > >> might have to go as low as 500 opcodes, I'm not sure. > > > > How do we do option 2? > > If l->has_value, just check the actual range. But of course backward branching > isn't that common in tcg generated code. Most branches within the TB are short > forward branches, but we also don't know how short is short. > > But every TB begins with a test of env->exit_code and a conditional branch to > the end of the block, where we place some code to return to the main loop and > return the pointer to the TB at which we exited. Thus every TB has a branch > that spans the size of the entire TB. > > So, invent (or repurpose) an R_RISCV_FOO value. It doesn't matter which > because it's private within tcg/riscv/. Just add some commentary. (See e.g. > tcg/sparc/ and its use of R_SPARC_13.) > > While generating code, emit the conditional branch as normal; leave the unknown > destination 0 for now. Emit a nop as the second insn. > > When resolving R_RISCV_FOO, if the conditional branch is in range, great! Just > patch it. If it is out of range, then you need to edit the conditional branch > to reverse the condition (insn ^ (1 << 12)) and branch to pc+8, i.e. over the > next instruction. Which was a nop during generation, but you will now install > jal r0,dest going to the real destination. Ok, I think I have done this correctly. I have something that compiles and runs. I'll send a second RFC out sometime this week. It won't be based on your latest patches and it's still missing some things but I want to keep this moving along. Thanks so much for your help Richard! Alistair > > > r~
diff --git a/tcg/riscv/tcg-target.inc.c b/tcg/riscv/tcg-target.inc.c index bc433170c4..b449e17295 100644 --- a/tcg/riscv/tcg-target.inc.c +++ b/tcg/riscv/tcg-target.inc.c @@ -574,6 +574,150 @@ static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val, return false; } +static const struct { + RISCVInsn op; + bool swap; +} tcg_brcond_to_riscv[] = { + [TCG_COND_EQ] = { OPC_BEQ, false }, + [TCG_COND_NE] = { OPC_BNE, false }, + [TCG_COND_LT] = { OPC_BLT, false }, + [TCG_COND_GE] = { OPC_BGE, false }, + [TCG_COND_LE] = { OPC_BGE, true }, + [TCG_COND_GT] = { OPC_BLT, true }, + [TCG_COND_LTU] = { OPC_BLTU, false }, + [TCG_COND_GEU] = { OPC_BGEU, false }, + [TCG_COND_LEU] = { OPC_BGEU, true }, + [TCG_COND_GTU] = { OPC_BLTU, true } +}; + +static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1, + TCGReg arg2, TCGLabel *l) +{ + RISCVInsn op = tcg_brcond_to_riscv[cond].op; + bool swap = tcg_brcond_to_riscv[cond].swap; + + tcg_out_opc_branch(s, op, swap ? arg2 : arg1, swap ? arg1 : arg2, 0); + + if (l->has_value) { + reloc_sbimm12(s->code_ptr - 1, l->u.value_ptr); + } else { + tcg_out_reloc(s, s->code_ptr - 1, R_RISCV_BRANCH, l, 0); + } +} + +static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg arg1, TCGReg arg2) +{ + switch (cond) { + case TCG_COND_EQ: + tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2); + tcg_out_opc_imm(s, OPC_SLTIU, ret, ret, 1); + break; + case TCG_COND_NE: + tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2); + tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, ret); + break; + case TCG_COND_LT: + tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); + break; + case TCG_COND_GE: + tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2); + tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); + break; + case TCG_COND_LE: + tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); + tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); + break; + case TCG_COND_GT: + tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1); + break; + case TCG_COND_LTU: + tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); + break; + case TCG_COND_GEU: + tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2); + tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); + break; + case TCG_COND_LEU: + tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); + tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1); + break; + case TCG_COND_GTU: + tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1); + break; + default: + g_assert_not_reached(); + break; + } +} + +static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah, + TCGReg bl, TCGReg bh, TCGLabel *l) +{ + /* todo */ + g_assert_not_reached(); +} + +static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret, + TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh) +{ + /* todo */ + g_assert_not_reached(); +} + +static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target) +{ + ptrdiff_t offset = tcg_pcrel_diff(s, target); + tcg_debug_assert(offset == sextract64(offset, 0, 26)); + tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, offset); +} + +static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target) +{ + ptrdiff_t offset = tcg_pcrel_diff(s, target); + + if (offset == sextract64(offset, 0, 26)) { + tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, offset); + } else { + tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target); + tcg_out_opc_jump(s, OPC_JAL, TCG_REG_TMP0, 0); + } +} + +static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail) +{ + TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA; + ptrdiff_t offset = tcg_pcrel_diff(s, arg); + if (offset == sextract32(offset, 1, 20) << 1) { + /* short jump: -2097150 to 2097152 */ + tcg_out_opc_jump(s, OPC_JAL, link, offset); + } else if (TCG_TARGET_REG_BITS == 32 || + offset == sextract32(offset, 1, 31) << 1) { + /* long jump: -2147483646 to 2147483648 */ + tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP0, 0); + tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, 0); + reloc_call(s->code_ptr - 2, arg); + } else if (TCG_TARGET_REG_BITS == 64) { + /* far jump: 64-bit */ + tcg_target_long imm = sextract32((tcg_target_long)arg, 0, 12); + tcg_target_long base = (tcg_target_long)arg - imm; + tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, base); + tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, imm); + } else { + g_assert_not_reached(); + } +} + +static void tcg_out_tail(TCGContext *s, tcg_insn_unit *arg) +{ + tcg_out_call_int(s, arg, true); +} + +static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg) +{ + tcg_out_call_int(s, arg, false); +} + void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr, uintptr_t addr) {