diff mbox

[RFC,v3,06/13] target-i386: translate: implement qemu_ldlink and qemu_stcond ops

Message ID 1436516626-8322-7-git-send-email-a.rigo@virtualopensystems.com
State New
Headers show

Commit Message

Alvise Rigo July 10, 2015, 8:23 a.m. UTC
Implement strex and ldrex instruction relying on TCG's qemu_ldlink and
qemu_stcond.  For the time being only 32bit configurations are supported.

Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com>
Suggested-by: Claudio Fontana <claudio.fontana@huawei.com>
Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
---
 tcg/i386/tcg-target.c | 136 ++++++++++++++++++++++++++++++++++++++++++--------
 1 file changed, 114 insertions(+), 22 deletions(-)

Comments

Alex Bennée July 17, 2015, 12:56 p.m. UTC | #1
Alvise Rigo <a.rigo@virtualopensystems.com> writes:

> Implement strex and ldrex instruction relying on TCG's qemu_ldlink and
> qemu_stcond.  For the time being only 32bit configurations are supported.
>
> Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com>
> Suggested-by: Claudio Fontana <claudio.fontana@huawei.com>
> Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
> ---
>  tcg/i386/tcg-target.c | 136 ++++++++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 114 insertions(+), 22 deletions(-)
>
> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
> index 0d7c99c..d8250a9 100644
> --- a/tcg/i386/tcg-target.c
> +++ b/tcg/i386/tcg-target.c
> @@ -1141,6 +1141,17 @@ static void * const qemu_ld_helpers[16] = {
>      [MO_BEQ]  = helper_be_ldq_mmu,
>  };
>  
> +/* LoadLink helpers, only unsigned. Use the macro below to access them. */
> +static void * const qemu_ldex_helpers[16] = {
> +    [MO_LEUL] = helper_le_ldlinkul_mmu,
> +};
> +
> +#define LDEX_HELPER(mem_op)                                             \
> +({                                                                      \
> +    assert(mem_op & MO_EXCL);                                           \
> +    qemu_ldex_helpers[((int)mem_op - MO_EXCL)];                         \
> +})
> +
>  /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
>   *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
>   */
> @@ -1154,6 +1165,17 @@ static void * const qemu_st_helpers[16] = {
>      [MO_BEQ]  = helper_be_stq_mmu,
>  };
>  
> +/* StoreConditional helpers. Use the macro below to access them. */
> +static void * const qemu_stex_helpers[16] = {
> +    [MO_LEUL] = helper_le_stcondl_mmu,
> +};
> +
> +#define STEX_HELPER(mem_op)                                             \
> +({                                                                      \
> +    assert(mem_op & MO_EXCL);                                           \
> +    qemu_stex_helpers[(int)mem_op - MO_EXCL];                           \
> +})
> +

Same comments as for target-arm.

Do we need to be protecting backends with HAS_LDST_EXCL defines or some
such macro hackery? What currently happens if you use the new TCG ops
when the backend doesn't support them? Is supporting all backends a
prerequisite for the series?

>  /* Perform the TLB load and compare.
>  
>     Inputs:
> @@ -1249,6 +1271,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
>   * for a load or store, so that we can later generate the correct helper code
>   */
>  static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
> +                                TCGReg llsc_success,
>                                  TCGReg datalo, TCGReg datahi,
>                                  TCGReg addrlo, TCGReg addrhi,
>                                  tcg_insn_unit *raddr,
> @@ -1257,6 +1280,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
>      TCGLabelQemuLdst *label = new_ldst_label(s);
>  
>      label->is_ld = is_ld;
> +    label->llsc_success = llsc_success;
>      label->oi = oi;
>      label->datalo_reg = datalo;
>      label->datahi_reg = datahi;
> @@ -1311,7 +1335,11 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
>                       (uintptr_t)l->raddr);
>      }
>  
> -    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
> +    if (opc & MO_EXCL) {
> +        tcg_out_call(s, LDEX_HELPER(opc));
> +    } else {
> +        tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]);
> +    }
>  
>      data_reg = l->datalo_reg;
>      switch (opc & MO_SSIZE) {
> @@ -1415,9 +1443,16 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
>          }
>      }
>  
> -    /* "Tail call" to the helper, with the return address back inline.  */
> -    tcg_out_push(s, retaddr);
> -    tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
> +    if (opc & MO_EXCL) {
> +        tcg_out_call(s, STEX_HELPER(opc));
> +        /* Save the output of the StoreConditional */
> +        tcg_out_mov(s, TCG_TYPE_I32, l->llsc_success, TCG_REG_EAX);
> +        tcg_out_jmp(s, l->raddr);
> +    } else {
> +        /* "Tail call" to the helper, with the return address back inline.  */
> +        tcg_out_push(s, retaddr);
> +        tcg_out_jmp(s, qemu_st_helpers[opc]);
> +    }
>  }
>  #elif defined(__x86_64__) && defined(__linux__)
>  # include <asm/prctl.h>
> @@ -1530,7 +1565,8 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
>  /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
>     EAX. It will be useful once fixed registers globals are less
>     common. */
> -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
> +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64,
> +                            bool isLoadLink)
>  {
>      TCGReg datalo, datahi, addrlo;
>      TCGReg addrhi __attribute__((unused));
> @@ -1553,14 +1589,34 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
>      mem_index = get_mmuidx(oi);
>      s_bits = opc & MO_SIZE;
>  
> -    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
> -                     label_ptr, offsetof(CPUTLBEntry, addr_read));
> +    if (isLoadLink) {
> +        TCGType t = ((TCG_TARGET_REG_BITS == 64) && (TARGET_LONG_BITS == 64)) ?
> +                                                   TCG_TYPE_I64 : TCG_TYPE_I32;
> +        /* The JMP address will be patched afterwards,
> +         * in tcg_out_qemu_ld_slow_path (two times when
> +         * TARGET_LONG_BITS > TCG_TARGET_REG_BITS). */
> +        tcg_out_mov(s, t, TCG_REG_L1, addrlo);
> +
> +        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
> +            /* Store the second part of the address. */
> +            tcg_out_mov(s, t, TCG_REG_L0, addrhi);
> +            /* We add 4 to include the jmp that follows. */
> +            label_ptr[1] = s->code_ptr + 4;
> +        }
>  
> -    /* TLB Hit.  */
> -    tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
> +        tcg_out_opc(s, OPC_JMP_long, 0, 0, 0);
> +        label_ptr[0] = s->code_ptr;
> +        s->code_ptr += 4;
> +    } else {
> +        tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
> +                         label_ptr, offsetof(CPUTLBEntry, addr_read));
> +
> +        /* TLB Hit.  */
> +        tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
> +    }
>  
>      /* Record the current context of a load into ldst label */
> -    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
> +    add_qemu_ldst_label(s, true, oi, 0, datalo, datahi, addrlo, addrhi,
>                          s->code_ptr, label_ptr);
>  #else
>      {
> @@ -1663,9 +1719,10 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
>      }
>  }
>  
> -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
> +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64,
> +                            bool isStoreCond)
>  {
> -    TCGReg datalo, datahi, addrlo;
> +    TCGReg datalo, datahi, addrlo, llsc_success;
>      TCGReg addrhi __attribute__((unused));
>      TCGMemOpIdx oi;
>      TCGMemOp opc;
> @@ -1675,6 +1732,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
>      tcg_insn_unit *label_ptr[2];
>  #endif
>  
> +    /* The stcond variant has one more param */
> +    llsc_success = (isStoreCond ? *args++ : 0);
> +
>      datalo = *args++;
>      datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
>      addrlo = *args++;
> @@ -1686,15 +1746,35 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
>      mem_index = get_mmuidx(oi);
>      s_bits = opc & MO_SIZE;
>  
> -    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
> -                     label_ptr, offsetof(CPUTLBEntry, addr_write));
> +    if (isStoreCond) {
> +        TCGType t = ((TCG_TARGET_REG_BITS == 64) && (TARGET_LONG_BITS == 64)) ?
> +                                                   TCG_TYPE_I64 : TCG_TYPE_I32;
> +        /* The JMP address will be filled afterwards,
> +         * in tcg_out_qemu_ld_slow_path (two times when
> +         * TARGET_LONG_BITS > TCG_TARGET_REG_BITS). */
> +        tcg_out_mov(s, t, TCG_REG_L1, addrlo);
> +
> +        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
> +            /* Store the second part of the address. */
> +            tcg_out_mov(s, t, TCG_REG_L0, addrhi);
> +            /* We add 4 to include the jmp that follows. */
> +            label_ptr[1] = s->code_ptr + 4;
> +        }
>  
> -    /* TLB Hit.  */
> -    tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
> +        tcg_out_opc(s, OPC_JMP_long, 0, 0, 0);
> +        label_ptr[0] = s->code_ptr;
> +        s->code_ptr += 4;
> +    } else {
> +        tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
> +                         label_ptr, offsetof(CPUTLBEntry, addr_write));
> +
> +        /* TLB Hit.  */
> +        tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
> +    }
>  
>      /* Record the current context of a store into ldst label */
> -    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
> -                        s->code_ptr, label_ptr);
> +    add_qemu_ldst_label(s, false, oi, llsc_success, datalo, datahi, addrlo,
> +                        addrhi, s->code_ptr, label_ptr);
>  #else
>      {
>          int32_t offset = GUEST_BASE;
> @@ -1955,16 +2035,22 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>          break;
>  
>      case INDEX_op_qemu_ld_i32:
> -        tcg_out_qemu_ld(s, args, 0);
> +        tcg_out_qemu_ld(s, args, 0, 0);
> +        break;
> +    case INDEX_op_qemu_ldlink_i32:
> +        tcg_out_qemu_ld(s, args, 0, 1);
>          break;
>      case INDEX_op_qemu_ld_i64:
> -        tcg_out_qemu_ld(s, args, 1);
> +        tcg_out_qemu_ld(s, args, 1, 0);
>          break;
>      case INDEX_op_qemu_st_i32:
> -        tcg_out_qemu_st(s, args, 0);
> +        tcg_out_qemu_st(s, args, 0, 0);
> +        break;
> +    case INDEX_op_qemu_stcond_i32:
> +        tcg_out_qemu_st(s, args, 0, 1);
>          break;
>      case INDEX_op_qemu_st_i64:
> -        tcg_out_qemu_st(s, args, 1);
> +        tcg_out_qemu_st(s, args, 1, 0);
>          break;
>  
>      OP_32_64(mulu2):
> @@ -2186,17 +2272,23 @@ static const TCGTargetOpDef x86_op_defs[] = {
>  
>  #if TCG_TARGET_REG_BITS == 64
>      { INDEX_op_qemu_ld_i32, { "r", "L" } },
> +    { INDEX_op_qemu_ldlink_i32, { "r", "L" } },
>      { INDEX_op_qemu_st_i32, { "L", "L" } },
> +    { INDEX_op_qemu_stcond_i32, { "r", "L", "L" } },
>      { INDEX_op_qemu_ld_i64, { "r", "L" } },
>      { INDEX_op_qemu_st_i64, { "L", "L" } },
>  #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
>      { INDEX_op_qemu_ld_i32, { "r", "L" } },
> +    { INDEX_op_qemu_ldlink_i32, { "r", "L" } },
>      { INDEX_op_qemu_st_i32, { "L", "L" } },
> +    { INDEX_op_qemu_stcond_i32, { "r", "L", "L" } },
>      { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
>      { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
>  #else
>      { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
> +    { INDEX_op_qemu_ldlink_i32, { "r", "L", "L" } },
>      { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
> +    { INDEX_op_qemu_stcond_i32, { "r", "L", "L", "L" } },
>      { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
>      { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
>  #endif
Alvise Rigo July 17, 2015, 1:27 p.m. UTC | #2
On Fri, Jul 17, 2015 at 2:56 PM, Alex Bennée <alex.bennee@linaro.org> wrote:
>
> Alvise Rigo <a.rigo@virtualopensystems.com> writes:
>
>> Implement strex and ldrex instruction relying on TCG's qemu_ldlink and
>> qemu_stcond.  For the time being only 32bit configurations are supported.
>>
>> Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com>
>> Suggested-by: Claudio Fontana <claudio.fontana@huawei.com>
>> Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com>
>> ---
>>  tcg/i386/tcg-target.c | 136 ++++++++++++++++++++++++++++++++++++++++++--------
>>  1 file changed, 114 insertions(+), 22 deletions(-)
>>
>> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
>> index 0d7c99c..d8250a9 100644
>> --- a/tcg/i386/tcg-target.c
>> +++ b/tcg/i386/tcg-target.c
>> @@ -1141,6 +1141,17 @@ static void * const qemu_ld_helpers[16] = {
>>      [MO_BEQ]  = helper_be_ldq_mmu,
>>  };
>>
>> +/* LoadLink helpers, only unsigned. Use the macro below to access them. */
>> +static void * const qemu_ldex_helpers[16] = {
>> +    [MO_LEUL] = helper_le_ldlinkul_mmu,
>> +};
>> +
>> +#define LDEX_HELPER(mem_op)                                             \
>> +({                                                                      \
>> +    assert(mem_op & MO_EXCL);                                           \
>> +    qemu_ldex_helpers[((int)mem_op - MO_EXCL)];                         \
>> +})
>> +
>>  /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
>>   *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
>>   */
>> @@ -1154,6 +1165,17 @@ static void * const qemu_st_helpers[16] = {
>>      [MO_BEQ]  = helper_be_stq_mmu,
>>  };
>>
>> +/* StoreConditional helpers. Use the macro below to access them. */
>> +static void * const qemu_stex_helpers[16] = {
>> +    [MO_LEUL] = helper_le_stcondl_mmu,
>> +};
>> +
>> +#define STEX_HELPER(mem_op)                                             \
>> +({                                                                      \
>> +    assert(mem_op & MO_EXCL);                                           \
>> +    qemu_stex_helpers[(int)mem_op - MO_EXCL];                           \
>> +})
>> +
>
> Same comments as for target-arm.
>
> Do we need to be protecting backends with HAS_LDST_EXCL defines or some
> such macro hackery? What currently happens if you use the new TCG ops
> when the backend doesn't support them? Is supporting all backends a
> prerequisite for the series?

I think that the ideal approach would be to have all the backends
implementing the slowpath for atomic instructions so that the
HAS_LDST_EXCL macro will not be needed. Then a frontend can rely on
the slowpath or not.
So, ideally, it's a prerequisite.

Regards,
alvise

>
>>  /* Perform the TLB load and compare.
>>
>>     Inputs:
>> @@ -1249,6 +1271,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
>>   * for a load or store, so that we can later generate the correct helper code
>>   */
>>  static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
>> +                                TCGReg llsc_success,
>>                                  TCGReg datalo, TCGReg datahi,
>>                                  TCGReg addrlo, TCGReg addrhi,
>>                                  tcg_insn_unit *raddr,
>> @@ -1257,6 +1280,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
>>      TCGLabelQemuLdst *label = new_ldst_label(s);
>>
>>      label->is_ld = is_ld;
>> +    label->llsc_success = llsc_success;
>>      label->oi = oi;
>>      label->datalo_reg = datalo;
>>      label->datahi_reg = datahi;
>> @@ -1311,7 +1335,11 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
>>                       (uintptr_t)l->raddr);
>>      }
>>
>> -    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
>> +    if (opc & MO_EXCL) {
>> +        tcg_out_call(s, LDEX_HELPER(opc));
>> +    } else {
>> +        tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]);
>> +    }
>>
>>      data_reg = l->datalo_reg;
>>      switch (opc & MO_SSIZE) {
>> @@ -1415,9 +1443,16 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
>>          }
>>      }
>>
>> -    /* "Tail call" to the helper, with the return address back inline.  */
>> -    tcg_out_push(s, retaddr);
>> -    tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
>> +    if (opc & MO_EXCL) {
>> +        tcg_out_call(s, STEX_HELPER(opc));
>> +        /* Save the output of the StoreConditional */
>> +        tcg_out_mov(s, TCG_TYPE_I32, l->llsc_success, TCG_REG_EAX);
>> +        tcg_out_jmp(s, l->raddr);
>> +    } else {
>> +        /* "Tail call" to the helper, with the return address back inline.  */
>> +        tcg_out_push(s, retaddr);
>> +        tcg_out_jmp(s, qemu_st_helpers[opc]);
>> +    }
>>  }
>>  #elif defined(__x86_64__) && defined(__linux__)
>>  # include <asm/prctl.h>
>> @@ -1530,7 +1565,8 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
>>  /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
>>     EAX. It will be useful once fixed registers globals are less
>>     common. */
>> -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
>> +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64,
>> +                            bool isLoadLink)
>>  {
>>      TCGReg datalo, datahi, addrlo;
>>      TCGReg addrhi __attribute__((unused));
>> @@ -1553,14 +1589,34 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
>>      mem_index = get_mmuidx(oi);
>>      s_bits = opc & MO_SIZE;
>>
>> -    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
>> -                     label_ptr, offsetof(CPUTLBEntry, addr_read));
>> +    if (isLoadLink) {
>> +        TCGType t = ((TCG_TARGET_REG_BITS == 64) && (TARGET_LONG_BITS == 64)) ?
>> +                                                   TCG_TYPE_I64 : TCG_TYPE_I32;
>> +        /* The JMP address will be patched afterwards,
>> +         * in tcg_out_qemu_ld_slow_path (two times when
>> +         * TARGET_LONG_BITS > TCG_TARGET_REG_BITS). */
>> +        tcg_out_mov(s, t, TCG_REG_L1, addrlo);
>> +
>> +        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
>> +            /* Store the second part of the address. */
>> +            tcg_out_mov(s, t, TCG_REG_L0, addrhi);
>> +            /* We add 4 to include the jmp that follows. */
>> +            label_ptr[1] = s->code_ptr + 4;
>> +        }
>>
>> -    /* TLB Hit.  */
>> -    tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
>> +        tcg_out_opc(s, OPC_JMP_long, 0, 0, 0);
>> +        label_ptr[0] = s->code_ptr;
>> +        s->code_ptr += 4;
>> +    } else {
>> +        tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
>> +                         label_ptr, offsetof(CPUTLBEntry, addr_read));
>> +
>> +        /* TLB Hit.  */
>> +        tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
>> +    }
>>
>>      /* Record the current context of a load into ldst label */
>> -    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
>> +    add_qemu_ldst_label(s, true, oi, 0, datalo, datahi, addrlo, addrhi,
>>                          s->code_ptr, label_ptr);
>>  #else
>>      {
>> @@ -1663,9 +1719,10 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
>>      }
>>  }
>>
>> -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
>> +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64,
>> +                            bool isStoreCond)
>>  {
>> -    TCGReg datalo, datahi, addrlo;
>> +    TCGReg datalo, datahi, addrlo, llsc_success;
>>      TCGReg addrhi __attribute__((unused));
>>      TCGMemOpIdx oi;
>>      TCGMemOp opc;
>> @@ -1675,6 +1732,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
>>      tcg_insn_unit *label_ptr[2];
>>  #endif
>>
>> +    /* The stcond variant has one more param */
>> +    llsc_success = (isStoreCond ? *args++ : 0);
>> +
>>      datalo = *args++;
>>      datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
>>      addrlo = *args++;
>> @@ -1686,15 +1746,35 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
>>      mem_index = get_mmuidx(oi);
>>      s_bits = opc & MO_SIZE;
>>
>> -    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
>> -                     label_ptr, offsetof(CPUTLBEntry, addr_write));
>> +    if (isStoreCond) {
>> +        TCGType t = ((TCG_TARGET_REG_BITS == 64) && (TARGET_LONG_BITS == 64)) ?
>> +                                                   TCG_TYPE_I64 : TCG_TYPE_I32;
>> +        /* The JMP address will be filled afterwards,
>> +         * in tcg_out_qemu_ld_slow_path (two times when
>> +         * TARGET_LONG_BITS > TCG_TARGET_REG_BITS). */
>> +        tcg_out_mov(s, t, TCG_REG_L1, addrlo);
>> +
>> +        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
>> +            /* Store the second part of the address. */
>> +            tcg_out_mov(s, t, TCG_REG_L0, addrhi);
>> +            /* We add 4 to include the jmp that follows. */
>> +            label_ptr[1] = s->code_ptr + 4;
>> +        }
>>
>> -    /* TLB Hit.  */
>> -    tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
>> +        tcg_out_opc(s, OPC_JMP_long, 0, 0, 0);
>> +        label_ptr[0] = s->code_ptr;
>> +        s->code_ptr += 4;
>> +    } else {
>> +        tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
>> +                         label_ptr, offsetof(CPUTLBEntry, addr_write));
>> +
>> +        /* TLB Hit.  */
>> +        tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
>> +    }
>>
>>      /* Record the current context of a store into ldst label */
>> -    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
>> -                        s->code_ptr, label_ptr);
>> +    add_qemu_ldst_label(s, false, oi, llsc_success, datalo, datahi, addrlo,
>> +                        addrhi, s->code_ptr, label_ptr);
>>  #else
>>      {
>>          int32_t offset = GUEST_BASE;
>> @@ -1955,16 +2035,22 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
>>          break;
>>
>>      case INDEX_op_qemu_ld_i32:
>> -        tcg_out_qemu_ld(s, args, 0);
>> +        tcg_out_qemu_ld(s, args, 0, 0);
>> +        break;
>> +    case INDEX_op_qemu_ldlink_i32:
>> +        tcg_out_qemu_ld(s, args, 0, 1);
>>          break;
>>      case INDEX_op_qemu_ld_i64:
>> -        tcg_out_qemu_ld(s, args, 1);
>> +        tcg_out_qemu_ld(s, args, 1, 0);
>>          break;
>>      case INDEX_op_qemu_st_i32:
>> -        tcg_out_qemu_st(s, args, 0);
>> +        tcg_out_qemu_st(s, args, 0, 0);
>> +        break;
>> +    case INDEX_op_qemu_stcond_i32:
>> +        tcg_out_qemu_st(s, args, 0, 1);
>>          break;
>>      case INDEX_op_qemu_st_i64:
>> -        tcg_out_qemu_st(s, args, 1);
>> +        tcg_out_qemu_st(s, args, 1, 0);
>>          break;
>>
>>      OP_32_64(mulu2):
>> @@ -2186,17 +2272,23 @@ static const TCGTargetOpDef x86_op_defs[] = {
>>
>>  #if TCG_TARGET_REG_BITS == 64
>>      { INDEX_op_qemu_ld_i32, { "r", "L" } },
>> +    { INDEX_op_qemu_ldlink_i32, { "r", "L" } },
>>      { INDEX_op_qemu_st_i32, { "L", "L" } },
>> +    { INDEX_op_qemu_stcond_i32, { "r", "L", "L" } },
>>      { INDEX_op_qemu_ld_i64, { "r", "L" } },
>>      { INDEX_op_qemu_st_i64, { "L", "L" } },
>>  #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
>>      { INDEX_op_qemu_ld_i32, { "r", "L" } },
>> +    { INDEX_op_qemu_ldlink_i32, { "r", "L" } },
>>      { INDEX_op_qemu_st_i32, { "L", "L" } },
>> +    { INDEX_op_qemu_stcond_i32, { "r", "L", "L" } },
>>      { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
>>      { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
>>  #else
>>      { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
>> +    { INDEX_op_qemu_ldlink_i32, { "r", "L", "L" } },
>>      { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
>> +    { INDEX_op_qemu_stcond_i32, { "r", "L", "L", "L" } },
>>      { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
>>      { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
>>  #endif
>
> --
> Alex Bennée
diff mbox

Patch

diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c
index 0d7c99c..d8250a9 100644
--- a/tcg/i386/tcg-target.c
+++ b/tcg/i386/tcg-target.c
@@ -1141,6 +1141,17 @@  static void * const qemu_ld_helpers[16] = {
     [MO_BEQ]  = helper_be_ldq_mmu,
 };
 
+/* LoadLink helpers, only unsigned. Use the macro below to access them. */
+static void * const qemu_ldex_helpers[16] = {
+    [MO_LEUL] = helper_le_ldlinkul_mmu,
+};
+
+#define LDEX_HELPER(mem_op)                                             \
+({                                                                      \
+    assert(mem_op & MO_EXCL);                                           \
+    qemu_ldex_helpers[((int)mem_op - MO_EXCL)];                         \
+})
+
 /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr,
  *                                     uintxx_t val, int mmu_idx, uintptr_t ra)
  */
@@ -1154,6 +1165,17 @@  static void * const qemu_st_helpers[16] = {
     [MO_BEQ]  = helper_be_stq_mmu,
 };
 
+/* StoreConditional helpers. Use the macro below to access them. */
+static void * const qemu_stex_helpers[16] = {
+    [MO_LEUL] = helper_le_stcondl_mmu,
+};
+
+#define STEX_HELPER(mem_op)                                             \
+({                                                                      \
+    assert(mem_op & MO_EXCL);                                           \
+    qemu_stex_helpers[(int)mem_op - MO_EXCL];                           \
+})
+
 /* Perform the TLB load and compare.
 
    Inputs:
@@ -1249,6 +1271,7 @@  static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
  * for a load or store, so that we can later generate the correct helper code
  */
 static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
+                                TCGReg llsc_success,
                                 TCGReg datalo, TCGReg datahi,
                                 TCGReg addrlo, TCGReg addrhi,
                                 tcg_insn_unit *raddr,
@@ -1257,6 +1280,7 @@  static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi,
     TCGLabelQemuLdst *label = new_ldst_label(s);
 
     label->is_ld = is_ld;
+    label->llsc_success = llsc_success;
     label->oi = oi;
     label->datalo_reg = datalo;
     label->datahi_reg = datahi;
@@ -1311,7 +1335,11 @@  static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
                      (uintptr_t)l->raddr);
     }
 
-    tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+    if (opc & MO_EXCL) {
+        tcg_out_call(s, LDEX_HELPER(opc));
+    } else {
+        tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]);
+    }
 
     data_reg = l->datalo_reg;
     switch (opc & MO_SSIZE) {
@@ -1415,9 +1443,16 @@  static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
         }
     }
 
-    /* "Tail call" to the helper, with the return address back inline.  */
-    tcg_out_push(s, retaddr);
-    tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]);
+    if (opc & MO_EXCL) {
+        tcg_out_call(s, STEX_HELPER(opc));
+        /* Save the output of the StoreConditional */
+        tcg_out_mov(s, TCG_TYPE_I32, l->llsc_success, TCG_REG_EAX);
+        tcg_out_jmp(s, l->raddr);
+    } else {
+        /* "Tail call" to the helper, with the return address back inline.  */
+        tcg_out_push(s, retaddr);
+        tcg_out_jmp(s, qemu_st_helpers[opc]);
+    }
 }
 #elif defined(__x86_64__) && defined(__linux__)
 # include <asm/prctl.h>
@@ -1530,7 +1565,8 @@  static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
 /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and
    EAX. It will be useful once fixed registers globals are less
    common. */
-static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
+static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64,
+                            bool isLoadLink)
 {
     TCGReg datalo, datahi, addrlo;
     TCGReg addrhi __attribute__((unused));
@@ -1553,14 +1589,34 @@  static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64)
     mem_index = get_mmuidx(oi);
     s_bits = opc & MO_SIZE;
 
-    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
-                     label_ptr, offsetof(CPUTLBEntry, addr_read));
+    if (isLoadLink) {
+        TCGType t = ((TCG_TARGET_REG_BITS == 64) && (TARGET_LONG_BITS == 64)) ?
+                                                   TCG_TYPE_I64 : TCG_TYPE_I32;
+        /* The JMP address will be patched afterwards,
+         * in tcg_out_qemu_ld_slow_path (two times when
+         * TARGET_LONG_BITS > TCG_TARGET_REG_BITS). */
+        tcg_out_mov(s, t, TCG_REG_L1, addrlo);
+
+        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+            /* Store the second part of the address. */
+            tcg_out_mov(s, t, TCG_REG_L0, addrhi);
+            /* We add 4 to include the jmp that follows. */
+            label_ptr[1] = s->code_ptr + 4;
+        }
 
-    /* TLB Hit.  */
-    tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
+        tcg_out_opc(s, OPC_JMP_long, 0, 0, 0);
+        label_ptr[0] = s->code_ptr;
+        s->code_ptr += 4;
+    } else {
+        tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
+                         label_ptr, offsetof(CPUTLBEntry, addr_read));
+
+        /* TLB Hit.  */
+        tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
+    }
 
     /* Record the current context of a load into ldst label */
-    add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi,
+    add_qemu_ldst_label(s, true, oi, 0, datalo, datahi, addrlo, addrhi,
                         s->code_ptr, label_ptr);
 #else
     {
@@ -1663,9 +1719,10 @@  static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi,
     }
 }
 
-static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
+static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64,
+                            bool isStoreCond)
 {
-    TCGReg datalo, datahi, addrlo;
+    TCGReg datalo, datahi, addrlo, llsc_success;
     TCGReg addrhi __attribute__((unused));
     TCGMemOpIdx oi;
     TCGMemOp opc;
@@ -1675,6 +1732,9 @@  static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
     tcg_insn_unit *label_ptr[2];
 #endif
 
+    /* The stcond variant has one more param */
+    llsc_success = (isStoreCond ? *args++ : 0);
+
     datalo = *args++;
     datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0);
     addrlo = *args++;
@@ -1686,15 +1746,35 @@  static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64)
     mem_index = get_mmuidx(oi);
     s_bits = opc & MO_SIZE;
 
-    tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
-                     label_ptr, offsetof(CPUTLBEntry, addr_write));
+    if (isStoreCond) {
+        TCGType t = ((TCG_TARGET_REG_BITS == 64) && (TARGET_LONG_BITS == 64)) ?
+                                                   TCG_TYPE_I64 : TCG_TYPE_I32;
+        /* The JMP address will be filled afterwards,
+         * in tcg_out_qemu_ld_slow_path (two times when
+         * TARGET_LONG_BITS > TCG_TARGET_REG_BITS). */
+        tcg_out_mov(s, t, TCG_REG_L1, addrlo);
+
+        if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
+            /* Store the second part of the address. */
+            tcg_out_mov(s, t, TCG_REG_L0, addrhi);
+            /* We add 4 to include the jmp that follows. */
+            label_ptr[1] = s->code_ptr + 4;
+        }
 
-    /* TLB Hit.  */
-    tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
+        tcg_out_opc(s, OPC_JMP_long, 0, 0, 0);
+        label_ptr[0] = s->code_ptr;
+        s->code_ptr += 4;
+    } else {
+        tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits,
+                         label_ptr, offsetof(CPUTLBEntry, addr_write));
+
+        /* TLB Hit.  */
+        tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc);
+    }
 
     /* Record the current context of a store into ldst label */
-    add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi,
-                        s->code_ptr, label_ptr);
+    add_qemu_ldst_label(s, false, oi, llsc_success, datalo, datahi, addrlo,
+                        addrhi, s->code_ptr, label_ptr);
 #else
     {
         int32_t offset = GUEST_BASE;
@@ -1955,16 +2035,22 @@  static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
         break;
 
     case INDEX_op_qemu_ld_i32:
-        tcg_out_qemu_ld(s, args, 0);
+        tcg_out_qemu_ld(s, args, 0, 0);
+        break;
+    case INDEX_op_qemu_ldlink_i32:
+        tcg_out_qemu_ld(s, args, 0, 1);
         break;
     case INDEX_op_qemu_ld_i64:
-        tcg_out_qemu_ld(s, args, 1);
+        tcg_out_qemu_ld(s, args, 1, 0);
         break;
     case INDEX_op_qemu_st_i32:
-        tcg_out_qemu_st(s, args, 0);
+        tcg_out_qemu_st(s, args, 0, 0);
+        break;
+    case INDEX_op_qemu_stcond_i32:
+        tcg_out_qemu_st(s, args, 0, 1);
         break;
     case INDEX_op_qemu_st_i64:
-        tcg_out_qemu_st(s, args, 1);
+        tcg_out_qemu_st(s, args, 1, 0);
         break;
 
     OP_32_64(mulu2):
@@ -2186,17 +2272,23 @@  static const TCGTargetOpDef x86_op_defs[] = {
 
 #if TCG_TARGET_REG_BITS == 64
     { INDEX_op_qemu_ld_i32, { "r", "L" } },
+    { INDEX_op_qemu_ldlink_i32, { "r", "L" } },
     { INDEX_op_qemu_st_i32, { "L", "L" } },
+    { INDEX_op_qemu_stcond_i32, { "r", "L", "L" } },
     { INDEX_op_qemu_ld_i64, { "r", "L" } },
     { INDEX_op_qemu_st_i64, { "L", "L" } },
 #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS
     { INDEX_op_qemu_ld_i32, { "r", "L" } },
+    { INDEX_op_qemu_ldlink_i32, { "r", "L" } },
     { INDEX_op_qemu_st_i32, { "L", "L" } },
+    { INDEX_op_qemu_stcond_i32, { "r", "L", "L" } },
     { INDEX_op_qemu_ld_i64, { "r", "r", "L" } },
     { INDEX_op_qemu_st_i64, { "L", "L", "L" } },
 #else
     { INDEX_op_qemu_ld_i32, { "r", "L", "L" } },
+    { INDEX_op_qemu_ldlink_i32, { "r", "L", "L" } },
     { INDEX_op_qemu_st_i32, { "L", "L", "L" } },
+    { INDEX_op_qemu_stcond_i32, { "r", "L", "L", "L" } },
     { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } },
     { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } },
 #endif