Message ID | 1472935202-3342-35-git-send-email-rth@twiddle.net |
---|---|
State | New |
Headers | show |
Richard Henderson <rth@twiddle.net> writes: > Emulating LL/SC with cmpxchg is not correct, since it can > suffer from the ABA problem. However, portable parallel > code is writting assuming only cmpxchg which means that in > practice this is a viable alternative. > > Signed-off-by: Richard Henderson <rth@twiddle.net> > --- > linux-user/main.c | 49 ---------------------- > target-alpha/cpu.h | 4 -- > target-alpha/helper.c | 6 --- > target-alpha/machine.c | 2 - > target-alpha/translate.c | 104 ++++++++++++++++++++--------------------------- > 5 files changed, 45 insertions(+), 120 deletions(-) > > diff --git a/linux-user/main.c b/linux-user/main.c > index 64838bf..6cdd0da 100644 > --- a/linux-user/main.c > +++ b/linux-user/main.c > @@ -2994,51 +2994,6 @@ void cpu_loop(CPUM68KState *env) > #endif /* TARGET_M68K */ > > #ifdef TARGET_ALPHA > -static void do_store_exclusive(CPUAlphaState *env, int reg, int quad) > -{ > - target_ulong addr, val, tmp; > - target_siginfo_t info; > - int ret = 0; > - > - addr = env->lock_addr; > - tmp = env->lock_st_addr; > - env->lock_addr = -1; > - env->lock_st_addr = 0; > - > - start_exclusive(); > - mmap_lock(); > - > - if (addr == tmp) { > - if (quad ? get_user_s64(val, addr) : get_user_s32(val, addr)) { > - goto do_sigsegv; > - } > - > - if (val == env->lock_value) { > - tmp = env->ir[reg]; > - if (quad ? put_user_u64(tmp, addr) : put_user_u32(tmp, addr)) { > - goto do_sigsegv; > - } > - ret = 1; > - } > - } > - env->ir[reg] = ret; > - env->pc += 4; > - > - mmap_unlock(); > - end_exclusive(); > - return; > - > - do_sigsegv: > - mmap_unlock(); > - end_exclusive(); > - > - info.si_signo = TARGET_SIGSEGV; > - info.si_errno = 0; > - info.si_code = TARGET_SEGV_MAPERR; > - info._sifields._sigfault._addr = addr; > - queue_signal(env, TARGET_SIGSEGV, &info); > -} > - > void cpu_loop(CPUAlphaState *env) > { > CPUState *cs = CPU(alpha_env_get_cpu(env)); > @@ -3212,10 +3167,6 @@ void cpu_loop(CPUAlphaState *env) > queue_signal(env, info.si_signo, &info); > } > break; > - case EXCP_STL_C: > - case EXCP_STQ_C: > - do_store_exclusive(env, env->error_code, trapnr - EXCP_STL_C); > - break; > case EXCP_INTERRUPT: > /* Just indicate that signals should be handled asap. */ > break; > diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h > index 9d9489c..a4068c8 100644 > --- a/target-alpha/cpu.h > +++ b/target-alpha/cpu.h > @@ -230,7 +230,6 @@ struct CPUAlphaState { > uint64_t pc; > uint64_t unique; > uint64_t lock_addr; > - uint64_t lock_st_addr; > uint64_t lock_value; > > /* The FPCR, and disassembled portions thereof. */ > @@ -346,9 +345,6 @@ enum { > EXCP_ARITH, > EXCP_FEN, > EXCP_CALL_PAL, > - /* For Usermode emulation. */ > - EXCP_STL_C, > - EXCP_STQ_C, > }; > > /* Alpha-specific interrupt pending bits. */ > diff --git a/target-alpha/helper.c b/target-alpha/helper.c > index 1ed0725..95453f5 100644 > --- a/target-alpha/helper.c > +++ b/target-alpha/helper.c > @@ -306,12 +306,6 @@ void alpha_cpu_do_interrupt(CPUState *cs) > case EXCP_CALL_PAL: > name = "call_pal"; > break; > - case EXCP_STL_C: > - name = "stl_c"; > - break; > - case EXCP_STQ_C: > - name = "stq_c"; > - break; > } > qemu_log("INT %6d: %s(%#x) pc=%016" PRIx64 " sp=%016" PRIx64 "\n", > ++count, name, env->error_code, env->pc, env->ir[IR_SP]); > diff --git a/target-alpha/machine.c b/target-alpha/machine.c > index 710b783..b99a123 100644 > --- a/target-alpha/machine.c > +++ b/target-alpha/machine.c > @@ -45,8 +45,6 @@ static VMStateField vmstate_env_fields[] = { > VMSTATE_UINTTL(unique, CPUAlphaState), > VMSTATE_UINTTL(lock_addr, CPUAlphaState), > VMSTATE_UINTTL(lock_value, CPUAlphaState), > - /* Note that lock_st_addr is not saved; it is a temporary > - used during the execution of the st[lq]_c insns. */ > > VMSTATE_UINT8(ps, CPUAlphaState), > VMSTATE_UINT8(intr_flag, CPUAlphaState), > diff --git a/target-alpha/translate.c b/target-alpha/translate.c > index 2941159..ed353de 100644 > --- a/target-alpha/translate.c > +++ b/target-alpha/translate.c > @@ -99,7 +99,6 @@ static TCGv cpu_std_ir[31]; > static TCGv cpu_fir[31]; > static TCGv cpu_pc; > static TCGv cpu_lock_addr; > -static TCGv cpu_lock_st_addr; > static TCGv cpu_lock_value; > > #ifndef CONFIG_USER_ONLY > @@ -116,7 +115,6 @@ void alpha_translate_init(void) > static const GlobalVar vars[] = { > DEF_VAR(pc), > DEF_VAR(lock_addr), > - DEF_VAR(lock_st_addr), > DEF_VAR(lock_value), > }; > > @@ -198,6 +196,23 @@ static TCGv dest_sink(DisasContext *ctx) > return ctx->sink; > } > > +static void free_context_temps(DisasContext *ctx) > +{ > + if (!TCGV_IS_UNUSED_I64(ctx->sink)) { > + tcg_gen_discard_i64(ctx->sink); > + tcg_temp_free(ctx->sink); > + TCGV_UNUSED_I64(ctx->sink); > + } > + if (!TCGV_IS_UNUSED_I64(ctx->zero)) { > + tcg_temp_free(ctx->zero); > + TCGV_UNUSED_I64(ctx->zero); > + } > + if (!TCGV_IS_UNUSED_I64(ctx->lit)) { > + tcg_temp_free(ctx->lit); > + TCGV_UNUSED_I64(ctx->lit); > + } > +} > + > static TCGv load_gpr(DisasContext *ctx, unsigned reg) > { > if (likely(reg < 31)) { > @@ -395,56 +410,37 @@ static ExitStatus gen_store_conditional(DisasContext *ctx, int ra, int rb, > int32_t disp16, int mem_idx, > TCGMemOp op) > { > - TCGv addr; > - > - if (ra == 31) { > - /* ??? Don't bother storing anything. The user can't tell > - the difference, since the zero register always reads zero. */ > - return NO_EXIT; > - } > - > -#if defined(CONFIG_USER_ONLY) > - addr = cpu_lock_st_addr; > -#else > - addr = tcg_temp_local_new(); > -#endif > + TCGLabel *lab_fail, *lab_done; > + TCGv addr, val; > > + addr = tcg_temp_new_i64(); > tcg_gen_addi_i64(addr, load_gpr(ctx, rb), disp16); > + free_context_temps(ctx); > > -#if defined(CONFIG_USER_ONLY) > - /* ??? This is handled via a complicated version of compare-and-swap > - in the cpu_loop. Hopefully one day we'll have a real CAS opcode > - in TCG so that this isn't necessary. */ > - return gen_excp(ctx, (op & MO_SIZE) == MO_64 ? EXCP_STQ_C : EXCP_STL_C, ra); > -#else > - /* ??? In system mode we are never multi-threaded, so CAS can be > - implemented via a non-atomic load-compare-store sequence. */ > - { > - TCGLabel *lab_fail, *lab_done; > - TCGv val; > + lab_fail = gen_new_label(); > + lab_done = gen_new_label(); > + tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_lock_addr, lab_fail); > + tcg_temp_free_i64(addr); > > - lab_fail = gen_new_label(); > - lab_done = gen_new_label(); > - tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_lock_addr, lab_fail); > + val = tcg_temp_new_i64(); > + tcg_gen_atomic_cmpxchg_i64(val, cpu_lock_addr, cpu_lock_value, > + load_gpr(ctx, ra), mem_idx, op); > + free_context_temps(ctx); I don't quite follow what free_context_temps() is doing and why it needs to be done twice during the building of this instructions TCGOps? > > - val = tcg_temp_new(); > - tcg_gen_qemu_ld_i64(val, addr, mem_idx, op); > - tcg_gen_brcond_i64(TCG_COND_NE, val, cpu_lock_value, lab_fail); > - > - tcg_gen_qemu_st_i64(ctx->ir[ra], addr, mem_idx, op); > - tcg_gen_movi_i64(ctx->ir[ra], 1); > - tcg_gen_br(lab_done); > + if (ra != 31) { > + tcg_gen_setcond_i64(TCG_COND_EQ, ctx->ir[ra], val, cpu_lock_value); > + } > + tcg_temp_free_i64(val); > + tcg_gen_br(lab_done); > > - gen_set_label(lab_fail); > + gen_set_label(lab_fail); > + if (ra != 31) { > tcg_gen_movi_i64(ctx->ir[ra], 0); > - > - gen_set_label(lab_done); > - tcg_gen_movi_i64(cpu_lock_addr, -1); > - > - tcg_temp_free(addr); > - return NO_EXIT; > } > -#endif > + > + gen_set_label(lab_done); > + tcg_gen_movi_i64(cpu_lock_addr, -1); > + return NO_EXIT; > } > > static bool in_superpage(DisasContext *ctx, int64_t addr) > @@ -2914,6 +2910,10 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb) > /* Similarly for flush-to-zero. */ > ctx.tb_ftz = -1; > > + TCGV_UNUSED_I64(ctx.zero); > + TCGV_UNUSED_I64(ctx.sink); > + TCGV_UNUSED_I64(ctx.lit); > + > num_insns = 0; > max_insns = tb->cflags & CF_COUNT_MASK; > if (max_insns == 0) { > @@ -2948,23 +2948,9 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb) > } > insn = cpu_ldl_code(env, ctx.pc); > > - TCGV_UNUSED_I64(ctx.zero); > - TCGV_UNUSED_I64(ctx.sink); > - TCGV_UNUSED_I64(ctx.lit); > - > ctx.pc += 4; > ret = translate_one(ctxp, insn); > - > - if (!TCGV_IS_UNUSED_I64(ctx.sink)) { > - tcg_gen_discard_i64(ctx.sink); > - tcg_temp_free(ctx.sink); > - } > - if (!TCGV_IS_UNUSED_I64(ctx.zero)) { > - tcg_temp_free(ctx.zero); > - } > - if (!TCGV_IS_UNUSED_I64(ctx.lit)) { > - tcg_temp_free(ctx.lit); > - } > + free_context_temps(ctxp); > > /* If we reach a page boundary, are single stepping, > or exhaust instruction count, stop generation. */ -- Alex Bennée
On 09/15/2016 07:38 AM, Alex Bennée wrote: >> + lab_fail = gen_new_label(); >> > + lab_done = gen_new_label(); >> > + tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_lock_addr, lab_fail); >> > + tcg_temp_free_i64(addr); >> > >> > - lab_fail = gen_new_label(); >> > - lab_done = gen_new_label(); >> > - tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_lock_addr, lab_fail); >> > + val = tcg_temp_new_i64(); >> > + tcg_gen_atomic_cmpxchg_i64(val, cpu_lock_addr, cpu_lock_value, >> > + load_gpr(ctx, ra), mem_idx, op); >> > + free_context_temps(ctx); > I don't quite follow what free_context_temps() is doing and why it needs > to be done twice during the building of this instructions TCGOps? > It's releasing the zero, sink, imm TCGv temporaries from DisasContext, as they're going out of scope within each basic block. Since they're freed, they'll be re-created as necessary in the next basic block. The two (unlikely) relevant events are: (1) address register = zero register, so the zero temp becomes invalid, and (2) dest register = zero register, so the zero/sink temps become invalid. It's the kind of thing that really shouldn't ever happen for non-malicious code, but you don't want qemu to abort when invalid temps are used. Perhaps a comment is warranted here... r~
Richard Henderson <rth@twiddle.net> writes: > On 09/15/2016 07:38 AM, Alex Bennée wrote: >>> + lab_fail = gen_new_label(); >>> > + lab_done = gen_new_label(); >>> > + tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_lock_addr, lab_fail); >>> > + tcg_temp_free_i64(addr); >>> > >>> > - lab_fail = gen_new_label(); >>> > - lab_done = gen_new_label(); >>> > - tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_lock_addr, lab_fail); >>> > + val = tcg_temp_new_i64(); >>> > + tcg_gen_atomic_cmpxchg_i64(val, cpu_lock_addr, cpu_lock_value, >>> > + load_gpr(ctx, ra), mem_idx, op); >>> > + free_context_temps(ctx); >> I don't quite follow what free_context_temps() is doing and why it needs >> to be done twice during the building of this instructions TCGOps? >> > > It's releasing the zero, sink, imm TCGv temporaries from DisasContext, as > they're going out of scope within each basic block. Since they're freed, > they'll be re-created as necessary in the next basic block. > > The two (unlikely) relevant events are: (1) address register = zero register, > so the zero temp becomes invalid, and (2) dest register = zero register, so the > zero/sink temps become invalid. Ahh ok. The ARM code just allocates temps on demand including for its zero register which I guess means multiple ones could be assigned. There is also temp allocation logic to free them at the end of the block. I wonder if this is something that should be more of a feature of the core TCG code because let's face it temp tracking is a pain. > > It's the kind of thing that really shouldn't ever happen for non-malicious > code, but you don't want qemu to abort when invalid temps are used. Perhaps a > comment is warranted here... Does the code generator suffer if temps aren't freed as soon as possible? Is there a reason why we don't just do a general free all temps at the end of the block? -- Alex Bennée
On 09/15/2016 10:48 AM, Alex Bennée wrote: > Ahh ok. The ARM code just allocates temps on demand including for its > zero register which I guess means multiple ones could be assigned. There > is also temp allocation logic to free them at the end of the block. > > I wonder if this is something that should be more of a feature of the > core TCG code because let's face it temp tracking is a pain. Yep, sparc and s390 have a similar bit of temp tracking. It would be nice for that to be generic. > Does the code generator suffer if temps aren't freed as soon as > possible? Is there a reason why we don't just do a general free all > temps at the end of the block? Freeing temps as quickly as possible means that we minimize the number of temps required, and the register allocator has a couple of O(N**2) places, for N == number of temps. r~
diff --git a/linux-user/main.c b/linux-user/main.c index 64838bf..6cdd0da 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -2994,51 +2994,6 @@ void cpu_loop(CPUM68KState *env) #endif /* TARGET_M68K */ #ifdef TARGET_ALPHA -static void do_store_exclusive(CPUAlphaState *env, int reg, int quad) -{ - target_ulong addr, val, tmp; - target_siginfo_t info; - int ret = 0; - - addr = env->lock_addr; - tmp = env->lock_st_addr; - env->lock_addr = -1; - env->lock_st_addr = 0; - - start_exclusive(); - mmap_lock(); - - if (addr == tmp) { - if (quad ? get_user_s64(val, addr) : get_user_s32(val, addr)) { - goto do_sigsegv; - } - - if (val == env->lock_value) { - tmp = env->ir[reg]; - if (quad ? put_user_u64(tmp, addr) : put_user_u32(tmp, addr)) { - goto do_sigsegv; - } - ret = 1; - } - } - env->ir[reg] = ret; - env->pc += 4; - - mmap_unlock(); - end_exclusive(); - return; - - do_sigsegv: - mmap_unlock(); - end_exclusive(); - - info.si_signo = TARGET_SIGSEGV; - info.si_errno = 0; - info.si_code = TARGET_SEGV_MAPERR; - info._sifields._sigfault._addr = addr; - queue_signal(env, TARGET_SIGSEGV, &info); -} - void cpu_loop(CPUAlphaState *env) { CPUState *cs = CPU(alpha_env_get_cpu(env)); @@ -3212,10 +3167,6 @@ void cpu_loop(CPUAlphaState *env) queue_signal(env, info.si_signo, &info); } break; - case EXCP_STL_C: - case EXCP_STQ_C: - do_store_exclusive(env, env->error_code, trapnr - EXCP_STL_C); - break; case EXCP_INTERRUPT: /* Just indicate that signals should be handled asap. */ break; diff --git a/target-alpha/cpu.h b/target-alpha/cpu.h index 9d9489c..a4068c8 100644 --- a/target-alpha/cpu.h +++ b/target-alpha/cpu.h @@ -230,7 +230,6 @@ struct CPUAlphaState { uint64_t pc; uint64_t unique; uint64_t lock_addr; - uint64_t lock_st_addr; uint64_t lock_value; /* The FPCR, and disassembled portions thereof. */ @@ -346,9 +345,6 @@ enum { EXCP_ARITH, EXCP_FEN, EXCP_CALL_PAL, - /* For Usermode emulation. */ - EXCP_STL_C, - EXCP_STQ_C, }; /* Alpha-specific interrupt pending bits. */ diff --git a/target-alpha/helper.c b/target-alpha/helper.c index 1ed0725..95453f5 100644 --- a/target-alpha/helper.c +++ b/target-alpha/helper.c @@ -306,12 +306,6 @@ void alpha_cpu_do_interrupt(CPUState *cs) case EXCP_CALL_PAL: name = "call_pal"; break; - case EXCP_STL_C: - name = "stl_c"; - break; - case EXCP_STQ_C: - name = "stq_c"; - break; } qemu_log("INT %6d: %s(%#x) pc=%016" PRIx64 " sp=%016" PRIx64 "\n", ++count, name, env->error_code, env->pc, env->ir[IR_SP]); diff --git a/target-alpha/machine.c b/target-alpha/machine.c index 710b783..b99a123 100644 --- a/target-alpha/machine.c +++ b/target-alpha/machine.c @@ -45,8 +45,6 @@ static VMStateField vmstate_env_fields[] = { VMSTATE_UINTTL(unique, CPUAlphaState), VMSTATE_UINTTL(lock_addr, CPUAlphaState), VMSTATE_UINTTL(lock_value, CPUAlphaState), - /* Note that lock_st_addr is not saved; it is a temporary - used during the execution of the st[lq]_c insns. */ VMSTATE_UINT8(ps, CPUAlphaState), VMSTATE_UINT8(intr_flag, CPUAlphaState), diff --git a/target-alpha/translate.c b/target-alpha/translate.c index 2941159..ed353de 100644 --- a/target-alpha/translate.c +++ b/target-alpha/translate.c @@ -99,7 +99,6 @@ static TCGv cpu_std_ir[31]; static TCGv cpu_fir[31]; static TCGv cpu_pc; static TCGv cpu_lock_addr; -static TCGv cpu_lock_st_addr; static TCGv cpu_lock_value; #ifndef CONFIG_USER_ONLY @@ -116,7 +115,6 @@ void alpha_translate_init(void) static const GlobalVar vars[] = { DEF_VAR(pc), DEF_VAR(lock_addr), - DEF_VAR(lock_st_addr), DEF_VAR(lock_value), }; @@ -198,6 +196,23 @@ static TCGv dest_sink(DisasContext *ctx) return ctx->sink; } +static void free_context_temps(DisasContext *ctx) +{ + if (!TCGV_IS_UNUSED_I64(ctx->sink)) { + tcg_gen_discard_i64(ctx->sink); + tcg_temp_free(ctx->sink); + TCGV_UNUSED_I64(ctx->sink); + } + if (!TCGV_IS_UNUSED_I64(ctx->zero)) { + tcg_temp_free(ctx->zero); + TCGV_UNUSED_I64(ctx->zero); + } + if (!TCGV_IS_UNUSED_I64(ctx->lit)) { + tcg_temp_free(ctx->lit); + TCGV_UNUSED_I64(ctx->lit); + } +} + static TCGv load_gpr(DisasContext *ctx, unsigned reg) { if (likely(reg < 31)) { @@ -395,56 +410,37 @@ static ExitStatus gen_store_conditional(DisasContext *ctx, int ra, int rb, int32_t disp16, int mem_idx, TCGMemOp op) { - TCGv addr; - - if (ra == 31) { - /* ??? Don't bother storing anything. The user can't tell - the difference, since the zero register always reads zero. */ - return NO_EXIT; - } - -#if defined(CONFIG_USER_ONLY) - addr = cpu_lock_st_addr; -#else - addr = tcg_temp_local_new(); -#endif + TCGLabel *lab_fail, *lab_done; + TCGv addr, val; + addr = tcg_temp_new_i64(); tcg_gen_addi_i64(addr, load_gpr(ctx, rb), disp16); + free_context_temps(ctx); -#if defined(CONFIG_USER_ONLY) - /* ??? This is handled via a complicated version of compare-and-swap - in the cpu_loop. Hopefully one day we'll have a real CAS opcode - in TCG so that this isn't necessary. */ - return gen_excp(ctx, (op & MO_SIZE) == MO_64 ? EXCP_STQ_C : EXCP_STL_C, ra); -#else - /* ??? In system mode we are never multi-threaded, so CAS can be - implemented via a non-atomic load-compare-store sequence. */ - { - TCGLabel *lab_fail, *lab_done; - TCGv val; + lab_fail = gen_new_label(); + lab_done = gen_new_label(); + tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_lock_addr, lab_fail); + tcg_temp_free_i64(addr); - lab_fail = gen_new_label(); - lab_done = gen_new_label(); - tcg_gen_brcond_i64(TCG_COND_NE, addr, cpu_lock_addr, lab_fail); + val = tcg_temp_new_i64(); + tcg_gen_atomic_cmpxchg_i64(val, cpu_lock_addr, cpu_lock_value, + load_gpr(ctx, ra), mem_idx, op); + free_context_temps(ctx); - val = tcg_temp_new(); - tcg_gen_qemu_ld_i64(val, addr, mem_idx, op); - tcg_gen_brcond_i64(TCG_COND_NE, val, cpu_lock_value, lab_fail); - - tcg_gen_qemu_st_i64(ctx->ir[ra], addr, mem_idx, op); - tcg_gen_movi_i64(ctx->ir[ra], 1); - tcg_gen_br(lab_done); + if (ra != 31) { + tcg_gen_setcond_i64(TCG_COND_EQ, ctx->ir[ra], val, cpu_lock_value); + } + tcg_temp_free_i64(val); + tcg_gen_br(lab_done); - gen_set_label(lab_fail); + gen_set_label(lab_fail); + if (ra != 31) { tcg_gen_movi_i64(ctx->ir[ra], 0); - - gen_set_label(lab_done); - tcg_gen_movi_i64(cpu_lock_addr, -1); - - tcg_temp_free(addr); - return NO_EXIT; } -#endif + + gen_set_label(lab_done); + tcg_gen_movi_i64(cpu_lock_addr, -1); + return NO_EXIT; } static bool in_superpage(DisasContext *ctx, int64_t addr) @@ -2914,6 +2910,10 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb) /* Similarly for flush-to-zero. */ ctx.tb_ftz = -1; + TCGV_UNUSED_I64(ctx.zero); + TCGV_UNUSED_I64(ctx.sink); + TCGV_UNUSED_I64(ctx.lit); + num_insns = 0; max_insns = tb->cflags & CF_COUNT_MASK; if (max_insns == 0) { @@ -2948,23 +2948,9 @@ void gen_intermediate_code(CPUAlphaState *env, struct TranslationBlock *tb) } insn = cpu_ldl_code(env, ctx.pc); - TCGV_UNUSED_I64(ctx.zero); - TCGV_UNUSED_I64(ctx.sink); - TCGV_UNUSED_I64(ctx.lit); - ctx.pc += 4; ret = translate_one(ctxp, insn); - - if (!TCGV_IS_UNUSED_I64(ctx.sink)) { - tcg_gen_discard_i64(ctx.sink); - tcg_temp_free(ctx.sink); - } - if (!TCGV_IS_UNUSED_I64(ctx.zero)) { - tcg_temp_free(ctx.zero); - } - if (!TCGV_IS_UNUSED_I64(ctx.lit)) { - tcg_temp_free(ctx.lit); - } + free_context_temps(ctxp); /* If we reach a page boundary, are single stepping, or exhaust instruction count, stop generation. */
Emulating LL/SC with cmpxchg is not correct, since it can suffer from the ABA problem. However, portable parallel code is writting assuming only cmpxchg which means that in practice this is a viable alternative. Signed-off-by: Richard Henderson <rth@twiddle.net> --- linux-user/main.c | 49 ---------------------- target-alpha/cpu.h | 4 -- target-alpha/helper.c | 6 --- target-alpha/machine.c | 2 - target-alpha/translate.c | 104 ++++++++++++++++++++--------------------------- 5 files changed, 45 insertions(+), 120 deletions(-)