diff mbox series

[RFC,v1,15/23] riscv: tcg-target: Add branch and jump instructions

Message ID f1a7d2617c26c1745fafe13bbe3bc3b042c14217.1542321076.git.alistair.francis@wdc.com
State New
Headers show
Series Add RISC-V TCG backend support | expand

Commit Message

Alistair Francis Nov. 15, 2018, 10:36 p.m. UTC
Signed-off-by: Alistair Francis <alistair.francis@wdc.com>
Signed-off-by: Michael Clark <mjc@sifive.com>
---
 tcg/riscv/tcg-target.inc.c | 144 +++++++++++++++++++++++++++++++++++++
 1 file changed, 144 insertions(+)

Comments

Richard Henderson Nov. 16, 2018, 9:14 a.m. UTC | #1
On 11/15/18 11:36 PM, Alistair Francis wrote:
> +static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
> +                           TCGReg arg2, TCGLabel *l)
> +{
> +    RISCVInsn op = tcg_brcond_to_riscv[cond].op;
> +    bool swap = tcg_brcond_to_riscv[cond].swap;
> +
> +    tcg_out_opc_branch(s, op, swap ? arg2 : arg1, swap ? arg1 : arg2, 0);

You might want to tcg_debug_assert(op != 0) here.

> +    if (l->has_value) {
> +        reloc_sbimm12(s->code_ptr - 1, l->u.value_ptr);

I'm concerned about the conditional branch range.  +-4K isn't much to work
with.  The minimum we have for other hosts is +-32K.

We have two options: (1) greatly reduce the max size of the TB for this host;
(2) be prepared to emit a 2 insn sequence: conditional branch across
unconditional branch, with forward branches that turn out to be small patched
with a nop.

FWIW, the first case would be done via modification of tcg_op_buf_full.  You
might have to go as low as 500 opcodes, I'm not sure.

> +static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
> +{
> +    ptrdiff_t offset = tcg_pcrel_diff(s, target);
> +    tcg_debug_assert(offset == sextract64(offset, 0, 26));
> +    tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, offset);
> +}
> +
> +static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
> +{
> +    ptrdiff_t offset = tcg_pcrel_diff(s, target);
> +
> +    if (offset == sextract64(offset, 0, 26)) {
> +        tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, offset);
> +    } else {
> +        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
> +        tcg_out_opc_jump(s, OPC_JAL, TCG_REG_TMP0, 0);
> +    }
> +}

How are these to be used?  I guess I'll find out...

> +static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail)
> +{
> +    TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA;
> +    ptrdiff_t offset = tcg_pcrel_diff(s, arg);
> +    if (offset == sextract32(offset, 1, 20) << 1) {

sextract64.

> +        /* short jump: -2097150 to 2097152 */
> +        tcg_out_opc_jump(s, OPC_JAL, link, offset);
> +    } else if (TCG_TARGET_REG_BITS == 32 ||
> +        offset == sextract32(offset, 1, 31) << 1) {

sextract64.

> +        /* long jump: -2147483646 to 2147483648 */
> +        tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP0, 0);
> +        tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, 0);
> +        reloc_call(s->code_ptr - 2, arg);
> +    } else if (TCG_TARGET_REG_BITS == 64) {
> +        /* far jump: 64-bit */
> +        tcg_target_long imm = sextract32((tcg_target_long)arg, 0, 12);
> +        tcg_target_long base = (tcg_target_long)arg - imm;
> +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, base);
> +        tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, imm);


r~
Alistair Francis Nov. 20, 2018, 11:49 p.m. UTC | #2
On Fri, Nov 16, 2018 at 1:14 AM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 11/15/18 11:36 PM, Alistair Francis wrote:
> > +static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
> > +                           TCGReg arg2, TCGLabel *l)
> > +{
> > +    RISCVInsn op = tcg_brcond_to_riscv[cond].op;
> > +    bool swap = tcg_brcond_to_riscv[cond].swap;
> > +
> > +    tcg_out_opc_branch(s, op, swap ? arg2 : arg1, swap ? arg1 : arg2, 0);
>
> You might want to tcg_debug_assert(op != 0) here.
>
> > +    if (l->has_value) {
> > +        reloc_sbimm12(s->code_ptr - 1, l->u.value_ptr);
>
> I'm concerned about the conditional branch range.  +-4K isn't much to work
> with.  The minimum we have for other hosts is +-32K.
>
> We have two options: (1) greatly reduce the max size of the TB for this host;
> (2) be prepared to emit a 2 insn sequence: conditional branch across
> unconditional branch, with forward branches that turn out to be small patched
> with a nop.
>
> FWIW, the first case would be done via modification of tcg_op_buf_full.  You
> might have to go as low as 500 opcodes, I'm not sure.

How do we do option 2?

>
> > +static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
> > +{
> > +    ptrdiff_t offset = tcg_pcrel_diff(s, target);
> > +    tcg_debug_assert(offset == sextract64(offset, 0, 26));
> > +    tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, offset);
> > +}
> > +
> > +static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
> > +{
> > +    ptrdiff_t offset = tcg_pcrel_diff(s, target);
> > +
> > +    if (offset == sextract64(offset, 0, 26)) {
> > +        tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, offset);
> > +    } else {
> > +        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
> > +        tcg_out_opc_jump(s, OPC_JAL, TCG_REG_TMP0, 0);
> > +    }
> > +}
>
> How are these to be used?  I guess I'll find out...

It's called on a exit TB op. I'm not sure if that's correct though.

Alistair

>
> > +static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail)
> > +{
> > +    TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA;
> > +    ptrdiff_t offset = tcg_pcrel_diff(s, arg);
> > +    if (offset == sextract32(offset, 1, 20) << 1) {
>
> sextract64.
>
> > +        /* short jump: -2097150 to 2097152 */
> > +        tcg_out_opc_jump(s, OPC_JAL, link, offset);
> > +    } else if (TCG_TARGET_REG_BITS == 32 ||
> > +        offset == sextract32(offset, 1, 31) << 1) {
>
> sextract64.
>
> > +        /* long jump: -2147483646 to 2147483648 */
> > +        tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP0, 0);
> > +        tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, 0);
> > +        reloc_call(s->code_ptr - 2, arg);
> > +    } else if (TCG_TARGET_REG_BITS == 64) {
> > +        /* far jump: 64-bit */
> > +        tcg_target_long imm = sextract32((tcg_target_long)arg, 0, 12);
> > +        tcg_target_long base = (tcg_target_long)arg - imm;
> > +        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, base);
> > +        tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, imm);
>
>
> r~
Richard Henderson Nov. 21, 2018, 7:40 a.m. UTC | #3
On 11/21/18 12:49 AM, Alistair Francis wrote:
> On Fri, Nov 16, 2018 at 1:14 AM Richard Henderson
> <richard.henderson@linaro.org> wrote:
>>
>> On 11/15/18 11:36 PM, Alistair Francis wrote:
>>> +static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
>>> +                           TCGReg arg2, TCGLabel *l)
>>> +{
>>> +    RISCVInsn op = tcg_brcond_to_riscv[cond].op;
>>> +    bool swap = tcg_brcond_to_riscv[cond].swap;
>>> +
>>> +    tcg_out_opc_branch(s, op, swap ? arg2 : arg1, swap ? arg1 : arg2, 0);
>>
>> You might want to tcg_debug_assert(op != 0) here.
>>
>>> +    if (l->has_value) {
>>> +        reloc_sbimm12(s->code_ptr - 1, l->u.value_ptr);
>>
>> I'm concerned about the conditional branch range.  +-4K isn't much to work
>> with.  The minimum we have for other hosts is +-32K.
>>
>> We have two options: (1) greatly reduce the max size of the TB for this host;
>> (2) be prepared to emit a 2 insn sequence: conditional branch across
>> unconditional branch, with forward branches that turn out to be small patched
>> with a nop.
>>
>> FWIW, the first case would be done via modification of tcg_op_buf_full.  You
>> might have to go as low as 500 opcodes, I'm not sure.
> 
> How do we do option 2?

If l->has_value, just check the actual range.  But of course backward branching
isn't that common in tcg generated code.  Most branches within the TB are short
forward branches, but we also don't know how short is short.

But every TB begins with a test of env->exit_code and a conditional branch to
the end of the block, where we place some code to return to the main loop and
return the pointer to the TB at which we exited.  Thus every TB has a branch
that spans the size of the entire TB.

So, invent (or repurpose) an R_RISCV_FOO value.  It doesn't matter which
because it's private within tcg/riscv/.  Just add some commentary.  (See e.g.
tcg/sparc/ and its use of R_SPARC_13.)

While generating code, emit the conditional branch as normal; leave the unknown
destination 0 for now.  Emit a nop as the second insn.

When resolving R_RISCV_FOO, if the conditional branch is in range, great!  Just
patch it.  If it is out of range, then you need to edit the conditional branch
to reverse the condition (insn ^ (1 << 12)) and branch to pc+8, i.e. over the
next instruction.  Which was a nop during generation, but you will now install
jal r0,dest going to the real destination.


r~
Alistair Francis Nov. 26, 2018, 10:58 p.m. UTC | #4
On Tue, Nov 20, 2018 at 11:40 PM Richard Henderson
<richard.henderson@linaro.org> wrote:
>
> On 11/21/18 12:49 AM, Alistair Francis wrote:
> > On Fri, Nov 16, 2018 at 1:14 AM Richard Henderson
> > <richard.henderson@linaro.org> wrote:
> >>
> >> On 11/15/18 11:36 PM, Alistair Francis wrote:
> >>> +static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
> >>> +                           TCGReg arg2, TCGLabel *l)
> >>> +{
> >>> +    RISCVInsn op = tcg_brcond_to_riscv[cond].op;
> >>> +    bool swap = tcg_brcond_to_riscv[cond].swap;
> >>> +
> >>> +    tcg_out_opc_branch(s, op, swap ? arg2 : arg1, swap ? arg1 : arg2, 0);
> >>
> >> You might want to tcg_debug_assert(op != 0) here.
> >>
> >>> +    if (l->has_value) {
> >>> +        reloc_sbimm12(s->code_ptr - 1, l->u.value_ptr);
> >>
> >> I'm concerned about the conditional branch range.  +-4K isn't much to work
> >> with.  The minimum we have for other hosts is +-32K.
> >>
> >> We have two options: (1) greatly reduce the max size of the TB for this host;
> >> (2) be prepared to emit a 2 insn sequence: conditional branch across
> >> unconditional branch, with forward branches that turn out to be small patched
> >> with a nop.
> >>
> >> FWIW, the first case would be done via modification of tcg_op_buf_full.  You
> >> might have to go as low as 500 opcodes, I'm not sure.
> >
> > How do we do option 2?
>
> If l->has_value, just check the actual range.  But of course backward branching
> isn't that common in tcg generated code.  Most branches within the TB are short
> forward branches, but we also don't know how short is short.
>
> But every TB begins with a test of env->exit_code and a conditional branch to
> the end of the block, where we place some code to return to the main loop and
> return the pointer to the TB at which we exited.  Thus every TB has a branch
> that spans the size of the entire TB.
>
> So, invent (or repurpose) an R_RISCV_FOO value.  It doesn't matter which
> because it's private within tcg/riscv/.  Just add some commentary.  (See e.g.
> tcg/sparc/ and its use of R_SPARC_13.)
>
> While generating code, emit the conditional branch as normal; leave the unknown
> destination 0 for now.  Emit a nop as the second insn.
>
> When resolving R_RISCV_FOO, if the conditional branch is in range, great!  Just
> patch it.  If it is out of range, then you need to edit the conditional branch
> to reverse the condition (insn ^ (1 << 12)) and branch to pc+8, i.e. over the
> next instruction.  Which was a nop during generation, but you will now install
> jal r0,dest going to the real destination.

Ok, I think I have done this correctly. I have something that compiles and runs.

I'll send a second RFC out sometime this week. It won't be based on
your latest patches and it's still missing some things but I want to
keep this moving along.

Thanks so much for your help Richard!

Alistair

>
>
> r~
diff mbox series

Patch

diff --git a/tcg/riscv/tcg-target.inc.c b/tcg/riscv/tcg-target.inc.c
index bc433170c4..b449e17295 100644
--- a/tcg/riscv/tcg-target.inc.c
+++ b/tcg/riscv/tcg-target.inc.c
@@ -574,6 +574,150 @@  static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
     return false;
 }
 
+static const struct {
+    RISCVInsn op;
+    bool swap;
+} tcg_brcond_to_riscv[] = {
+    [TCG_COND_EQ] =  { OPC_BEQ,  false },
+    [TCG_COND_NE] =  { OPC_BNE,  false },
+    [TCG_COND_LT] =  { OPC_BLT,  false },
+    [TCG_COND_GE] =  { OPC_BGE,  false },
+    [TCG_COND_LE] =  { OPC_BGE,  true  },
+    [TCG_COND_GT] =  { OPC_BLT,  true  },
+    [TCG_COND_LTU] = { OPC_BLTU, false },
+    [TCG_COND_GEU] = { OPC_BGEU, false },
+    [TCG_COND_LEU] = { OPC_BGEU, true  },
+    [TCG_COND_GTU] = { OPC_BLTU, true  }
+};
+
+static void tcg_out_brcond(TCGContext *s, TCGCond cond, TCGReg arg1,
+                           TCGReg arg2, TCGLabel *l)
+{
+    RISCVInsn op = tcg_brcond_to_riscv[cond].op;
+    bool swap = tcg_brcond_to_riscv[cond].swap;
+
+    tcg_out_opc_branch(s, op, swap ? arg2 : arg1, swap ? arg1 : arg2, 0);
+
+    if (l->has_value) {
+        reloc_sbimm12(s->code_ptr - 1, l->u.value_ptr);
+    } else {
+        tcg_out_reloc(s, s->code_ptr - 1, R_RISCV_BRANCH, l, 0);
+    }
+}
+
+static void tcg_out_setcond(TCGContext *s, TCGCond cond, TCGReg ret,
+                            TCGReg arg1, TCGReg arg2)
+{
+    switch (cond) {
+    case TCG_COND_EQ:
+        tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2);
+        tcg_out_opc_imm(s, OPC_SLTIU, ret, ret, 1);
+        break;
+    case TCG_COND_NE:
+        tcg_out_opc_reg(s, OPC_SUB, ret, arg1, arg2);
+        tcg_out_opc_reg(s, OPC_SLTU, ret, TCG_REG_ZERO, ret);
+        break;
+    case TCG_COND_LT:
+        tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
+        break;
+    case TCG_COND_GE:
+        tcg_out_opc_reg(s, OPC_SLT, ret, arg1, arg2);
+        tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
+        break;
+    case TCG_COND_LE:
+        tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1);
+        tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
+        break;
+    case TCG_COND_GT:
+        tcg_out_opc_reg(s, OPC_SLT, ret, arg2, arg1);
+        break;
+    case TCG_COND_LTU:
+        tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
+        break;
+    case TCG_COND_GEU:
+        tcg_out_opc_reg(s, OPC_SLTU, ret, arg1, arg2);
+        tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
+        break;
+    case TCG_COND_LEU:
+        tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1);
+        tcg_out_opc_imm(s, OPC_XORI, ret, ret, 1);
+        break;
+    case TCG_COND_GTU:
+        tcg_out_opc_reg(s, OPC_SLTU, ret, arg2, arg1);
+        break;
+    default:
+         g_assert_not_reached();
+         break;
+     }
+}
+
+static void tcg_out_brcond2(TCGContext *s, TCGCond cond, TCGReg al, TCGReg ah,
+                            TCGReg bl, TCGReg bh, TCGLabel *l)
+{
+    /* todo */
+    g_assert_not_reached();
+}
+
+static void tcg_out_setcond2(TCGContext *s, TCGCond cond, TCGReg ret,
+                             TCGReg al, TCGReg ah, TCGReg bl, TCGReg bh)
+{
+    /* todo */
+    g_assert_not_reached();
+}
+
+static inline void tcg_out_goto(TCGContext *s, tcg_insn_unit *target)
+{
+    ptrdiff_t offset = tcg_pcrel_diff(s, target);
+    tcg_debug_assert(offset == sextract64(offset, 0, 26));
+    tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, offset);
+}
+
+static inline void tcg_out_goto_long(TCGContext *s, tcg_insn_unit *target)
+{
+    ptrdiff_t offset = tcg_pcrel_diff(s, target);
+
+    if (offset == sextract64(offset, 0, 26)) {
+        tcg_out_opc_jump(s, OPC_JAL, TCG_REG_ZERO, offset);
+    } else {
+        tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_TMP0, (intptr_t)target);
+        tcg_out_opc_jump(s, OPC_JAL, TCG_REG_TMP0, 0);
+    }
+}
+
+static void tcg_out_call_int(TCGContext *s, tcg_insn_unit *arg, bool tail)
+{
+    TCGReg link = tail ? TCG_REG_ZERO : TCG_REG_RA;
+    ptrdiff_t offset = tcg_pcrel_diff(s, arg);
+    if (offset == sextract32(offset, 1, 20) << 1) {
+        /* short jump: -2097150 to 2097152 */
+        tcg_out_opc_jump(s, OPC_JAL, link, offset);
+    } else if (TCG_TARGET_REG_BITS == 32 ||
+        offset == sextract32(offset, 1, 31) << 1) {
+        /* long jump: -2147483646 to 2147483648 */
+        tcg_out_opc_upper(s, OPC_AUIPC, TCG_REG_TMP0, 0);
+        tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, 0);
+        reloc_call(s->code_ptr - 2, arg);
+    } else if (TCG_TARGET_REG_BITS == 64) {
+        /* far jump: 64-bit */
+        tcg_target_long imm = sextract32((tcg_target_long)arg, 0, 12);
+        tcg_target_long base = (tcg_target_long)arg - imm;
+        tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_TMP0, base);
+        tcg_out_opc_imm(s, OPC_JALR, link, TCG_REG_TMP0, imm);
+    } else {
+        g_assert_not_reached();
+    }
+}
+
+static void tcg_out_tail(TCGContext *s, tcg_insn_unit *arg)
+{
+    tcg_out_call_int(s, arg, true);
+}
+
+static void tcg_out_call(TCGContext *s, tcg_insn_unit *arg)
+{
+    tcg_out_call_int(s, arg, false);
+}
+
 void tb_target_set_jmp_target(uintptr_t tc_ptr, uintptr_t jmp_addr,
                               uintptr_t addr)
 {