Message ID | 1436516626-8322-7-git-send-email-a.rigo@virtualopensystems.com |
---|---|
State | New |
Headers | show |
Alvise Rigo <a.rigo@virtualopensystems.com> writes: > Implement strex and ldrex instruction relying on TCG's qemu_ldlink and > qemu_stcond. For the time being only 32bit configurations are supported. > > Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com> > Suggested-by: Claudio Fontana <claudio.fontana@huawei.com> > Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com> > --- > tcg/i386/tcg-target.c | 136 ++++++++++++++++++++++++++++++++++++++++++-------- > 1 file changed, 114 insertions(+), 22 deletions(-) > > diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c > index 0d7c99c..d8250a9 100644 > --- a/tcg/i386/tcg-target.c > +++ b/tcg/i386/tcg-target.c > @@ -1141,6 +1141,17 @@ static void * const qemu_ld_helpers[16] = { > [MO_BEQ] = helper_be_ldq_mmu, > }; > > +/* LoadLink helpers, only unsigned. Use the macro below to access them. */ > +static void * const qemu_ldex_helpers[16] = { > + [MO_LEUL] = helper_le_ldlinkul_mmu, > +}; > + > +#define LDEX_HELPER(mem_op) \ > +({ \ > + assert(mem_op & MO_EXCL); \ > + qemu_ldex_helpers[((int)mem_op - MO_EXCL)]; \ > +}) > + > /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, > * uintxx_t val, int mmu_idx, uintptr_t ra) > */ > @@ -1154,6 +1165,17 @@ static void * const qemu_st_helpers[16] = { > [MO_BEQ] = helper_be_stq_mmu, > }; > > +/* StoreConditional helpers. Use the macro below to access them. */ > +static void * const qemu_stex_helpers[16] = { > + [MO_LEUL] = helper_le_stcondl_mmu, > +}; > + > +#define STEX_HELPER(mem_op) \ > +({ \ > + assert(mem_op & MO_EXCL); \ > + qemu_stex_helpers[(int)mem_op - MO_EXCL]; \ > +}) > + Same comments as for target-arm. Do we need to be protecting backends with HAS_LDST_EXCL defines or some such macro hackery? What currently happens if you use the new TCG ops when the backend doesn't support them? Is supporting all backends a prerequisite for the series? > /* Perform the TLB load and compare. > > Inputs: > @@ -1249,6 +1271,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, > * for a load or store, so that we can later generate the correct helper code > */ > static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, > + TCGReg llsc_success, > TCGReg datalo, TCGReg datahi, > TCGReg addrlo, TCGReg addrhi, > tcg_insn_unit *raddr, > @@ -1257,6 +1280,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, > TCGLabelQemuLdst *label = new_ldst_label(s); > > label->is_ld = is_ld; > + label->llsc_success = llsc_success; > label->oi = oi; > label->datalo_reg = datalo; > label->datahi_reg = datahi; > @@ -1311,7 +1335,11 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) > (uintptr_t)l->raddr); > } > > - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); > + if (opc & MO_EXCL) { > + tcg_out_call(s, LDEX_HELPER(opc)); > + } else { > + tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]); > + } > > data_reg = l->datalo_reg; > switch (opc & MO_SSIZE) { > @@ -1415,9 +1443,16 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) > } > } > > - /* "Tail call" to the helper, with the return address back inline. */ > - tcg_out_push(s, retaddr); > - tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); > + if (opc & MO_EXCL) { > + tcg_out_call(s, STEX_HELPER(opc)); > + /* Save the output of the StoreConditional */ > + tcg_out_mov(s, TCG_TYPE_I32, l->llsc_success, TCG_REG_EAX); > + tcg_out_jmp(s, l->raddr); > + } else { > + /* "Tail call" to the helper, with the return address back inline. */ > + tcg_out_push(s, retaddr); > + tcg_out_jmp(s, qemu_st_helpers[opc]); > + } > } > #elif defined(__x86_64__) && defined(__linux__) > # include <asm/prctl.h> > @@ -1530,7 +1565,8 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, > /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and > EAX. It will be useful once fixed registers globals are less > common. */ > -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) > +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64, > + bool isLoadLink) > { > TCGReg datalo, datahi, addrlo; > TCGReg addrhi __attribute__((unused)); > @@ -1553,14 +1589,34 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) > mem_index = get_mmuidx(oi); > s_bits = opc & MO_SIZE; > > - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, > - label_ptr, offsetof(CPUTLBEntry, addr_read)); > + if (isLoadLink) { > + TCGType t = ((TCG_TARGET_REG_BITS == 64) && (TARGET_LONG_BITS == 64)) ? > + TCG_TYPE_I64 : TCG_TYPE_I32; > + /* The JMP address will be patched afterwards, > + * in tcg_out_qemu_ld_slow_path (two times when > + * TARGET_LONG_BITS > TCG_TARGET_REG_BITS). */ > + tcg_out_mov(s, t, TCG_REG_L1, addrlo); > + > + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { > + /* Store the second part of the address. */ > + tcg_out_mov(s, t, TCG_REG_L0, addrhi); > + /* We add 4 to include the jmp that follows. */ > + label_ptr[1] = s->code_ptr + 4; > + } > > - /* TLB Hit. */ > - tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); > + tcg_out_opc(s, OPC_JMP_long, 0, 0, 0); > + label_ptr[0] = s->code_ptr; > + s->code_ptr += 4; > + } else { > + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, > + label_ptr, offsetof(CPUTLBEntry, addr_read)); > + > + /* TLB Hit. */ > + tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); > + } > > /* Record the current context of a load into ldst label */ > - add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, > + add_qemu_ldst_label(s, true, oi, 0, datalo, datahi, addrlo, addrhi, > s->code_ptr, label_ptr); > #else > { > @@ -1663,9 +1719,10 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, > } > } > > -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) > +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64, > + bool isStoreCond) > { > - TCGReg datalo, datahi, addrlo; > + TCGReg datalo, datahi, addrlo, llsc_success; > TCGReg addrhi __attribute__((unused)); > TCGMemOpIdx oi; > TCGMemOp opc; > @@ -1675,6 +1732,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) > tcg_insn_unit *label_ptr[2]; > #endif > > + /* The stcond variant has one more param */ > + llsc_success = (isStoreCond ? *args++ : 0); > + > datalo = *args++; > datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); > addrlo = *args++; > @@ -1686,15 +1746,35 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) > mem_index = get_mmuidx(oi); > s_bits = opc & MO_SIZE; > > - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, > - label_ptr, offsetof(CPUTLBEntry, addr_write)); > + if (isStoreCond) { > + TCGType t = ((TCG_TARGET_REG_BITS == 64) && (TARGET_LONG_BITS == 64)) ? > + TCG_TYPE_I64 : TCG_TYPE_I32; > + /* The JMP address will be filled afterwards, > + * in tcg_out_qemu_ld_slow_path (two times when > + * TARGET_LONG_BITS > TCG_TARGET_REG_BITS). */ > + tcg_out_mov(s, t, TCG_REG_L1, addrlo); > + > + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { > + /* Store the second part of the address. */ > + tcg_out_mov(s, t, TCG_REG_L0, addrhi); > + /* We add 4 to include the jmp that follows. */ > + label_ptr[1] = s->code_ptr + 4; > + } > > - /* TLB Hit. */ > - tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); > + tcg_out_opc(s, OPC_JMP_long, 0, 0, 0); > + label_ptr[0] = s->code_ptr; > + s->code_ptr += 4; > + } else { > + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, > + label_ptr, offsetof(CPUTLBEntry, addr_write)); > + > + /* TLB Hit. */ > + tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); > + } > > /* Record the current context of a store into ldst label */ > - add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, > - s->code_ptr, label_ptr); > + add_qemu_ldst_label(s, false, oi, llsc_success, datalo, datahi, addrlo, > + addrhi, s->code_ptr, label_ptr); > #else > { > int32_t offset = GUEST_BASE; > @@ -1955,16 +2035,22 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, > break; > > case INDEX_op_qemu_ld_i32: > - tcg_out_qemu_ld(s, args, 0); > + tcg_out_qemu_ld(s, args, 0, 0); > + break; > + case INDEX_op_qemu_ldlink_i32: > + tcg_out_qemu_ld(s, args, 0, 1); > break; > case INDEX_op_qemu_ld_i64: > - tcg_out_qemu_ld(s, args, 1); > + tcg_out_qemu_ld(s, args, 1, 0); > break; > case INDEX_op_qemu_st_i32: > - tcg_out_qemu_st(s, args, 0); > + tcg_out_qemu_st(s, args, 0, 0); > + break; > + case INDEX_op_qemu_stcond_i32: > + tcg_out_qemu_st(s, args, 0, 1); > break; > case INDEX_op_qemu_st_i64: > - tcg_out_qemu_st(s, args, 1); > + tcg_out_qemu_st(s, args, 1, 0); > break; > > OP_32_64(mulu2): > @@ -2186,17 +2272,23 @@ static const TCGTargetOpDef x86_op_defs[] = { > > #if TCG_TARGET_REG_BITS == 64 > { INDEX_op_qemu_ld_i32, { "r", "L" } }, > + { INDEX_op_qemu_ldlink_i32, { "r", "L" } }, > { INDEX_op_qemu_st_i32, { "L", "L" } }, > + { INDEX_op_qemu_stcond_i32, { "r", "L", "L" } }, > { INDEX_op_qemu_ld_i64, { "r", "L" } }, > { INDEX_op_qemu_st_i64, { "L", "L" } }, > #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS > { INDEX_op_qemu_ld_i32, { "r", "L" } }, > + { INDEX_op_qemu_ldlink_i32, { "r", "L" } }, > { INDEX_op_qemu_st_i32, { "L", "L" } }, > + { INDEX_op_qemu_stcond_i32, { "r", "L", "L" } }, > { INDEX_op_qemu_ld_i64, { "r", "r", "L" } }, > { INDEX_op_qemu_st_i64, { "L", "L", "L" } }, > #else > { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, > + { INDEX_op_qemu_ldlink_i32, { "r", "L", "L" } }, > { INDEX_op_qemu_st_i32, { "L", "L", "L" } }, > + { INDEX_op_qemu_stcond_i32, { "r", "L", "L", "L" } }, > { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } }, > { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } }, > #endif
On Fri, Jul 17, 2015 at 2:56 PM, Alex Bennée <alex.bennee@linaro.org> wrote: > > Alvise Rigo <a.rigo@virtualopensystems.com> writes: > >> Implement strex and ldrex instruction relying on TCG's qemu_ldlink and >> qemu_stcond. For the time being only 32bit configurations are supported. >> >> Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com> >> Suggested-by: Claudio Fontana <claudio.fontana@huawei.com> >> Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com> >> --- >> tcg/i386/tcg-target.c | 136 ++++++++++++++++++++++++++++++++++++++++++-------- >> 1 file changed, 114 insertions(+), 22 deletions(-) >> >> diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c >> index 0d7c99c..d8250a9 100644 >> --- a/tcg/i386/tcg-target.c >> +++ b/tcg/i386/tcg-target.c >> @@ -1141,6 +1141,17 @@ static void * const qemu_ld_helpers[16] = { >> [MO_BEQ] = helper_be_ldq_mmu, >> }; >> >> +/* LoadLink helpers, only unsigned. Use the macro below to access them. */ >> +static void * const qemu_ldex_helpers[16] = { >> + [MO_LEUL] = helper_le_ldlinkul_mmu, >> +}; >> + >> +#define LDEX_HELPER(mem_op) \ >> +({ \ >> + assert(mem_op & MO_EXCL); \ >> + qemu_ldex_helpers[((int)mem_op - MO_EXCL)]; \ >> +}) >> + >> /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, >> * uintxx_t val, int mmu_idx, uintptr_t ra) >> */ >> @@ -1154,6 +1165,17 @@ static void * const qemu_st_helpers[16] = { >> [MO_BEQ] = helper_be_stq_mmu, >> }; >> >> +/* StoreConditional helpers. Use the macro below to access them. */ >> +static void * const qemu_stex_helpers[16] = { >> + [MO_LEUL] = helper_le_stcondl_mmu, >> +}; >> + >> +#define STEX_HELPER(mem_op) \ >> +({ \ >> + assert(mem_op & MO_EXCL); \ >> + qemu_stex_helpers[(int)mem_op - MO_EXCL]; \ >> +}) >> + > > Same comments as for target-arm. > > Do we need to be protecting backends with HAS_LDST_EXCL defines or some > such macro hackery? What currently happens if you use the new TCG ops > when the backend doesn't support them? Is supporting all backends a > prerequisite for the series? I think that the ideal approach would be to have all the backends implementing the slowpath for atomic instructions so that the HAS_LDST_EXCL macro will not be needed. Then a frontend can rely on the slowpath or not. So, ideally, it's a prerequisite. Regards, alvise > >> /* Perform the TLB load and compare. >> >> Inputs: >> @@ -1249,6 +1271,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, >> * for a load or store, so that we can later generate the correct helper code >> */ >> static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, >> + TCGReg llsc_success, >> TCGReg datalo, TCGReg datahi, >> TCGReg addrlo, TCGReg addrhi, >> tcg_insn_unit *raddr, >> @@ -1257,6 +1280,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, >> TCGLabelQemuLdst *label = new_ldst_label(s); >> >> label->is_ld = is_ld; >> + label->llsc_success = llsc_success; >> label->oi = oi; >> label->datalo_reg = datalo; >> label->datahi_reg = datahi; >> @@ -1311,7 +1335,11 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) >> (uintptr_t)l->raddr); >> } >> >> - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); >> + if (opc & MO_EXCL) { >> + tcg_out_call(s, LDEX_HELPER(opc)); >> + } else { >> + tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]); >> + } >> >> data_reg = l->datalo_reg; >> switch (opc & MO_SSIZE) { >> @@ -1415,9 +1443,16 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) >> } >> } >> >> - /* "Tail call" to the helper, with the return address back inline. */ >> - tcg_out_push(s, retaddr); >> - tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); >> + if (opc & MO_EXCL) { >> + tcg_out_call(s, STEX_HELPER(opc)); >> + /* Save the output of the StoreConditional */ >> + tcg_out_mov(s, TCG_TYPE_I32, l->llsc_success, TCG_REG_EAX); >> + tcg_out_jmp(s, l->raddr); >> + } else { >> + /* "Tail call" to the helper, with the return address back inline. */ >> + tcg_out_push(s, retaddr); >> + tcg_out_jmp(s, qemu_st_helpers[opc]); >> + } >> } >> #elif defined(__x86_64__) && defined(__linux__) >> # include <asm/prctl.h> >> @@ -1530,7 +1565,8 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, >> /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and >> EAX. It will be useful once fixed registers globals are less >> common. */ >> -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) >> +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64, >> + bool isLoadLink) >> { >> TCGReg datalo, datahi, addrlo; >> TCGReg addrhi __attribute__((unused)); >> @@ -1553,14 +1589,34 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) >> mem_index = get_mmuidx(oi); >> s_bits = opc & MO_SIZE; >> >> - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, >> - label_ptr, offsetof(CPUTLBEntry, addr_read)); >> + if (isLoadLink) { >> + TCGType t = ((TCG_TARGET_REG_BITS == 64) && (TARGET_LONG_BITS == 64)) ? >> + TCG_TYPE_I64 : TCG_TYPE_I32; >> + /* The JMP address will be patched afterwards, >> + * in tcg_out_qemu_ld_slow_path (two times when >> + * TARGET_LONG_BITS > TCG_TARGET_REG_BITS). */ >> + tcg_out_mov(s, t, TCG_REG_L1, addrlo); >> + >> + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { >> + /* Store the second part of the address. */ >> + tcg_out_mov(s, t, TCG_REG_L0, addrhi); >> + /* We add 4 to include the jmp that follows. */ >> + label_ptr[1] = s->code_ptr + 4; >> + } >> >> - /* TLB Hit. */ >> - tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); >> + tcg_out_opc(s, OPC_JMP_long, 0, 0, 0); >> + label_ptr[0] = s->code_ptr; >> + s->code_ptr += 4; >> + } else { >> + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, >> + label_ptr, offsetof(CPUTLBEntry, addr_read)); >> + >> + /* TLB Hit. */ >> + tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); >> + } >> >> /* Record the current context of a load into ldst label */ >> - add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, >> + add_qemu_ldst_label(s, true, oi, 0, datalo, datahi, addrlo, addrhi, >> s->code_ptr, label_ptr); >> #else >> { >> @@ -1663,9 +1719,10 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, >> } >> } >> >> -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) >> +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64, >> + bool isStoreCond) >> { >> - TCGReg datalo, datahi, addrlo; >> + TCGReg datalo, datahi, addrlo, llsc_success; >> TCGReg addrhi __attribute__((unused)); >> TCGMemOpIdx oi; >> TCGMemOp opc; >> @@ -1675,6 +1732,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) >> tcg_insn_unit *label_ptr[2]; >> #endif >> >> + /* The stcond variant has one more param */ >> + llsc_success = (isStoreCond ? *args++ : 0); >> + >> datalo = *args++; >> datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); >> addrlo = *args++; >> @@ -1686,15 +1746,35 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) >> mem_index = get_mmuidx(oi); >> s_bits = opc & MO_SIZE; >> >> - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, >> - label_ptr, offsetof(CPUTLBEntry, addr_write)); >> + if (isStoreCond) { >> + TCGType t = ((TCG_TARGET_REG_BITS == 64) && (TARGET_LONG_BITS == 64)) ? >> + TCG_TYPE_I64 : TCG_TYPE_I32; >> + /* The JMP address will be filled afterwards, >> + * in tcg_out_qemu_ld_slow_path (two times when >> + * TARGET_LONG_BITS > TCG_TARGET_REG_BITS). */ >> + tcg_out_mov(s, t, TCG_REG_L1, addrlo); >> + >> + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { >> + /* Store the second part of the address. */ >> + tcg_out_mov(s, t, TCG_REG_L0, addrhi); >> + /* We add 4 to include the jmp that follows. */ >> + label_ptr[1] = s->code_ptr + 4; >> + } >> >> - /* TLB Hit. */ >> - tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); >> + tcg_out_opc(s, OPC_JMP_long, 0, 0, 0); >> + label_ptr[0] = s->code_ptr; >> + s->code_ptr += 4; >> + } else { >> + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, >> + label_ptr, offsetof(CPUTLBEntry, addr_write)); >> + >> + /* TLB Hit. */ >> + tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); >> + } >> >> /* Record the current context of a store into ldst label */ >> - add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, >> - s->code_ptr, label_ptr); >> + add_qemu_ldst_label(s, false, oi, llsc_success, datalo, datahi, addrlo, >> + addrhi, s->code_ptr, label_ptr); >> #else >> { >> int32_t offset = GUEST_BASE; >> @@ -1955,16 +2035,22 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, >> break; >> >> case INDEX_op_qemu_ld_i32: >> - tcg_out_qemu_ld(s, args, 0); >> + tcg_out_qemu_ld(s, args, 0, 0); >> + break; >> + case INDEX_op_qemu_ldlink_i32: >> + tcg_out_qemu_ld(s, args, 0, 1); >> break; >> case INDEX_op_qemu_ld_i64: >> - tcg_out_qemu_ld(s, args, 1); >> + tcg_out_qemu_ld(s, args, 1, 0); >> break; >> case INDEX_op_qemu_st_i32: >> - tcg_out_qemu_st(s, args, 0); >> + tcg_out_qemu_st(s, args, 0, 0); >> + break; >> + case INDEX_op_qemu_stcond_i32: >> + tcg_out_qemu_st(s, args, 0, 1); >> break; >> case INDEX_op_qemu_st_i64: >> - tcg_out_qemu_st(s, args, 1); >> + tcg_out_qemu_st(s, args, 1, 0); >> break; >> >> OP_32_64(mulu2): >> @@ -2186,17 +2272,23 @@ static const TCGTargetOpDef x86_op_defs[] = { >> >> #if TCG_TARGET_REG_BITS == 64 >> { INDEX_op_qemu_ld_i32, { "r", "L" } }, >> + { INDEX_op_qemu_ldlink_i32, { "r", "L" } }, >> { INDEX_op_qemu_st_i32, { "L", "L" } }, >> + { INDEX_op_qemu_stcond_i32, { "r", "L", "L" } }, >> { INDEX_op_qemu_ld_i64, { "r", "L" } }, >> { INDEX_op_qemu_st_i64, { "L", "L" } }, >> #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS >> { INDEX_op_qemu_ld_i32, { "r", "L" } }, >> + { INDEX_op_qemu_ldlink_i32, { "r", "L" } }, >> { INDEX_op_qemu_st_i32, { "L", "L" } }, >> + { INDEX_op_qemu_stcond_i32, { "r", "L", "L" } }, >> { INDEX_op_qemu_ld_i64, { "r", "r", "L" } }, >> { INDEX_op_qemu_st_i64, { "L", "L", "L" } }, >> #else >> { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, >> + { INDEX_op_qemu_ldlink_i32, { "r", "L", "L" } }, >> { INDEX_op_qemu_st_i32, { "L", "L", "L" } }, >> + { INDEX_op_qemu_stcond_i32, { "r", "L", "L", "L" } }, >> { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } }, >> { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } }, >> #endif > > -- > Alex Bennée
diff --git a/tcg/i386/tcg-target.c b/tcg/i386/tcg-target.c index 0d7c99c..d8250a9 100644 --- a/tcg/i386/tcg-target.c +++ b/tcg/i386/tcg-target.c @@ -1141,6 +1141,17 @@ static void * const qemu_ld_helpers[16] = { [MO_BEQ] = helper_be_ldq_mmu, }; +/* LoadLink helpers, only unsigned. Use the macro below to access them. */ +static void * const qemu_ldex_helpers[16] = { + [MO_LEUL] = helper_le_ldlinkul_mmu, +}; + +#define LDEX_HELPER(mem_op) \ +({ \ + assert(mem_op & MO_EXCL); \ + qemu_ldex_helpers[((int)mem_op - MO_EXCL)]; \ +}) + /* helper signature: helper_ret_st_mmu(CPUState *env, target_ulong addr, * uintxx_t val, int mmu_idx, uintptr_t ra) */ @@ -1154,6 +1165,17 @@ static void * const qemu_st_helpers[16] = { [MO_BEQ] = helper_be_stq_mmu, }; +/* StoreConditional helpers. Use the macro below to access them. */ +static void * const qemu_stex_helpers[16] = { + [MO_LEUL] = helper_le_stcondl_mmu, +}; + +#define STEX_HELPER(mem_op) \ +({ \ + assert(mem_op & MO_EXCL); \ + qemu_stex_helpers[(int)mem_op - MO_EXCL]; \ +}) + /* Perform the TLB load and compare. Inputs: @@ -1249,6 +1271,7 @@ static inline void tcg_out_tlb_load(TCGContext *s, TCGReg addrlo, TCGReg addrhi, * for a load or store, so that we can later generate the correct helper code */ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, + TCGReg llsc_success, TCGReg datalo, TCGReg datahi, TCGReg addrlo, TCGReg addrhi, tcg_insn_unit *raddr, @@ -1257,6 +1280,7 @@ static void add_qemu_ldst_label(TCGContext *s, bool is_ld, TCGMemOpIdx oi, TCGLabelQemuLdst *label = new_ldst_label(s); label->is_ld = is_ld; + label->llsc_success = llsc_success; label->oi = oi; label->datalo_reg = datalo; label->datahi_reg = datahi; @@ -1311,7 +1335,11 @@ static void tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) (uintptr_t)l->raddr); } - tcg_out_call(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SIZE)]); + if (opc & MO_EXCL) { + tcg_out_call(s, LDEX_HELPER(opc)); + } else { + tcg_out_call(s, qemu_ld_helpers[opc & ~MO_SIGN]); + } data_reg = l->datalo_reg; switch (opc & MO_SSIZE) { @@ -1415,9 +1443,16 @@ static void tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } } - /* "Tail call" to the helper, with the return address back inline. */ - tcg_out_push(s, retaddr); - tcg_out_jmp(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)]); + if (opc & MO_EXCL) { + tcg_out_call(s, STEX_HELPER(opc)); + /* Save the output of the StoreConditional */ + tcg_out_mov(s, TCG_TYPE_I32, l->llsc_success, TCG_REG_EAX); + tcg_out_jmp(s, l->raddr); + } else { + /* "Tail call" to the helper, with the return address back inline. */ + tcg_out_push(s, retaddr); + tcg_out_jmp(s, qemu_st_helpers[opc]); + } } #elif defined(__x86_64__) && defined(__linux__) # include <asm/prctl.h> @@ -1530,7 +1565,8 @@ static void tcg_out_qemu_ld_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, /* XXX: qemu_ld and qemu_st could be modified to clobber only EDX and EAX. It will be useful once fixed registers globals are less common. */ -static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) +static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64, + bool isLoadLink) { TCGReg datalo, datahi, addrlo; TCGReg addrhi __attribute__((unused)); @@ -1553,14 +1589,34 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, bool is64) mem_index = get_mmuidx(oi); s_bits = opc & MO_SIZE; - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, - label_ptr, offsetof(CPUTLBEntry, addr_read)); + if (isLoadLink) { + TCGType t = ((TCG_TARGET_REG_BITS == 64) && (TARGET_LONG_BITS == 64)) ? + TCG_TYPE_I64 : TCG_TYPE_I32; + /* The JMP address will be patched afterwards, + * in tcg_out_qemu_ld_slow_path (two times when + * TARGET_LONG_BITS > TCG_TARGET_REG_BITS). */ + tcg_out_mov(s, t, TCG_REG_L1, addrlo); + + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + /* Store the second part of the address. */ + tcg_out_mov(s, t, TCG_REG_L0, addrhi); + /* We add 4 to include the jmp that follows. */ + label_ptr[1] = s->code_ptr + 4; + } - /* TLB Hit. */ - tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); + tcg_out_opc(s, OPC_JMP_long, 0, 0, 0); + label_ptr[0] = s->code_ptr; + s->code_ptr += 4; + } else { + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, + label_ptr, offsetof(CPUTLBEntry, addr_read)); + + /* TLB Hit. */ + tcg_out_qemu_ld_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); + } /* Record the current context of a load into ldst label */ - add_qemu_ldst_label(s, true, oi, datalo, datahi, addrlo, addrhi, + add_qemu_ldst_label(s, true, oi, 0, datalo, datahi, addrlo, addrhi, s->code_ptr, label_ptr); #else { @@ -1663,9 +1719,10 @@ static void tcg_out_qemu_st_direct(TCGContext *s, TCGReg datalo, TCGReg datahi, } } -static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) +static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64, + bool isStoreCond) { - TCGReg datalo, datahi, addrlo; + TCGReg datalo, datahi, addrlo, llsc_success; TCGReg addrhi __attribute__((unused)); TCGMemOpIdx oi; TCGMemOp opc; @@ -1675,6 +1732,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) tcg_insn_unit *label_ptr[2]; #endif + /* The stcond variant has one more param */ + llsc_success = (isStoreCond ? *args++ : 0); + datalo = *args++; datahi = (TCG_TARGET_REG_BITS == 32 && is64 ? *args++ : 0); addrlo = *args++; @@ -1686,15 +1746,35 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, bool is64) mem_index = get_mmuidx(oi); s_bits = opc & MO_SIZE; - tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, - label_ptr, offsetof(CPUTLBEntry, addr_write)); + if (isStoreCond) { + TCGType t = ((TCG_TARGET_REG_BITS == 64) && (TARGET_LONG_BITS == 64)) ? + TCG_TYPE_I64 : TCG_TYPE_I32; + /* The JMP address will be filled afterwards, + * in tcg_out_qemu_ld_slow_path (two times when + * TARGET_LONG_BITS > TCG_TARGET_REG_BITS). */ + tcg_out_mov(s, t, TCG_REG_L1, addrlo); + + if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) { + /* Store the second part of the address. */ + tcg_out_mov(s, t, TCG_REG_L0, addrhi); + /* We add 4 to include the jmp that follows. */ + label_ptr[1] = s->code_ptr + 4; + } - /* TLB Hit. */ - tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); + tcg_out_opc(s, OPC_JMP_long, 0, 0, 0); + label_ptr[0] = s->code_ptr; + s->code_ptr += 4; + } else { + tcg_out_tlb_load(s, addrlo, addrhi, mem_index, s_bits, + label_ptr, offsetof(CPUTLBEntry, addr_write)); + + /* TLB Hit. */ + tcg_out_qemu_st_direct(s, datalo, datahi, TCG_REG_L1, 0, 0, opc); + } /* Record the current context of a store into ldst label */ - add_qemu_ldst_label(s, false, oi, datalo, datahi, addrlo, addrhi, - s->code_ptr, label_ptr); + add_qemu_ldst_label(s, false, oi, llsc_success, datalo, datahi, addrlo, + addrhi, s->code_ptr, label_ptr); #else { int32_t offset = GUEST_BASE; @@ -1955,16 +2035,22 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc, break; case INDEX_op_qemu_ld_i32: - tcg_out_qemu_ld(s, args, 0); + tcg_out_qemu_ld(s, args, 0, 0); + break; + case INDEX_op_qemu_ldlink_i32: + tcg_out_qemu_ld(s, args, 0, 1); break; case INDEX_op_qemu_ld_i64: - tcg_out_qemu_ld(s, args, 1); + tcg_out_qemu_ld(s, args, 1, 0); break; case INDEX_op_qemu_st_i32: - tcg_out_qemu_st(s, args, 0); + tcg_out_qemu_st(s, args, 0, 0); + break; + case INDEX_op_qemu_stcond_i32: + tcg_out_qemu_st(s, args, 0, 1); break; case INDEX_op_qemu_st_i64: - tcg_out_qemu_st(s, args, 1); + tcg_out_qemu_st(s, args, 1, 0); break; OP_32_64(mulu2): @@ -2186,17 +2272,23 @@ static const TCGTargetOpDef x86_op_defs[] = { #if TCG_TARGET_REG_BITS == 64 { INDEX_op_qemu_ld_i32, { "r", "L" } }, + { INDEX_op_qemu_ldlink_i32, { "r", "L" } }, { INDEX_op_qemu_st_i32, { "L", "L" } }, + { INDEX_op_qemu_stcond_i32, { "r", "L", "L" } }, { INDEX_op_qemu_ld_i64, { "r", "L" } }, { INDEX_op_qemu_st_i64, { "L", "L" } }, #elif TARGET_LONG_BITS <= TCG_TARGET_REG_BITS { INDEX_op_qemu_ld_i32, { "r", "L" } }, + { INDEX_op_qemu_ldlink_i32, { "r", "L" } }, { INDEX_op_qemu_st_i32, { "L", "L" } }, + { INDEX_op_qemu_stcond_i32, { "r", "L", "L" } }, { INDEX_op_qemu_ld_i64, { "r", "r", "L" } }, { INDEX_op_qemu_st_i64, { "L", "L", "L" } }, #else { INDEX_op_qemu_ld_i32, { "r", "L", "L" } }, + { INDEX_op_qemu_ldlink_i32, { "r", "L", "L" } }, { INDEX_op_qemu_st_i32, { "L", "L", "L" } }, + { INDEX_op_qemu_stcond_i32, { "r", "L", "L", "L" } }, { INDEX_op_qemu_ld_i64, { "r", "r", "L", "L" } }, { INDEX_op_qemu_st_i64, { "L", "L", "L", "L" } }, #endif
Implement strex and ldrex instruction relying on TCG's qemu_ldlink and qemu_stcond. For the time being only 32bit configurations are supported. Suggested-by: Jani Kokkonen <jani.kokkonen@huawei.com> Suggested-by: Claudio Fontana <claudio.fontana@huawei.com> Signed-off-by: Alvise Rigo <a.rigo@virtualopensystems.com> --- tcg/i386/tcg-target.c | 136 ++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 114 insertions(+), 22 deletions(-)