Message ID | 20220221092908.14165-1-hongyu.wang@intel.com |
---|---|
State | New |
Headers | show |
Series | i386: Relax cmpxchg instruction under -mrelax-cmpxchg-loop [PR 103069] | expand |
On Mon, Feb 21, 2022 at 10:29 AM Hongyu Wang <hongyu.wang@intel.com> wrote: > > Hi, > > For cmpxchg, it is commonly used in spin loop, and several user code > such as pthread directly takes cmpxchg as loop condition, which cause > huge cache bouncing. > > This patch extends previous implementation to relax all cmpxchg > instruction under -mrelax-cmpxchg-loop with an extra atomic load, > compare and emulate the failed cmpxchg behavior. > > For original spin loop which looks like > > loop: mov %eax,%r8d > or $1,%r8d > lock cmpxchg %r8d,(%rdi) > jne loop > > It will now truns to > > loop: mov %eax,%r8d > or $1,%r8d > mov (%r8),%rsi <--- load lock first > cmp %rsi,%rax <--- compare with expected input > jne .L2 <--- lock ne expected > lock cmpxchg %r8d,(%rdi) > jne loop > L2: mov %rsi,%rax <--- perform the behavior of failed cmpxchg > jne loop > > under -mrelax-cmpxchg-loop. > > Bootstrapped and regtested on x86_64-pc-linux-gnu{-m32,} > > OK for master? > > gcc/ChangeLog: > > PR target/103069 > * config/i386/i386-expand.cc (ix86_expand_atomic_fetch_op_loop): > Split atomic fetch and loop part. > (ix86_expand_cmpxchg_loop): New expander for cmpxchg loop. > * config/i386/i386-protos.h (ix86_expand_cmpxchg_loop): New > prototype. > * config/i386/sync.md (atomic_compare_and_swap<mode>): Call new > expander under TARGET_RELAX_CMPXCHG_LOOP. > (atomic_compare_and_swap<mode>): Likewise for doubleword modes. > > gcc/testsuite/ChangeLog: > > PR target/103069 > * gcc.target/i386/pr103069-2.c: Adjust result check. > * gcc.target/i386/pr103069-3.c: New test. > * gcc.target/i386/pr103069-4.c: Likewise. OK. Thanks, Uros. > --- > gcc/config/i386/i386-expand.cc | 153 +++++++++++++++------ > gcc/config/i386/i386-protos.h | 2 + > gcc/config/i386/sync.md | 65 +++++---- > gcc/testsuite/gcc.target/i386/pr103069-2.c | 4 +- > gcc/testsuite/gcc.target/i386/pr103069-3.c | 24 ++++ > gcc/testsuite/gcc.target/i386/pr103069-4.c | 43 ++++++ > 6 files changed, 226 insertions(+), 65 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/i386/pr103069-3.c > create mode 100644 gcc/testsuite/gcc.target/i386/pr103069-4.c > > diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc > index ce9607e36de..6cf1a0b9cb6 100644 > --- a/gcc/config/i386/i386-expand.cc > +++ b/gcc/config/i386/i386-expand.cc > @@ -23203,16 +23203,14 @@ void ix86_expand_atomic_fetch_op_loop (rtx target, rtx mem, rtx val, > enum rtx_code code, bool after, > bool doubleword) > { > - rtx old_reg, new_reg, old_mem, success, oldval, new_mem; > - rtx_code_label *loop_label, *pause_label, *done_label; > + rtx old_reg, new_reg, old_mem, success; > machine_mode mode = GET_MODE (target); > + rtx_code_label *loop_label = NULL; > > old_reg = gen_reg_rtx (mode); > new_reg = old_reg; > - loop_label = gen_label_rtx (); > - pause_label = gen_label_rtx (); > - done_label = gen_label_rtx (); > old_mem = copy_to_reg (mem); > + loop_label = gen_label_rtx (); > emit_label (loop_label); > emit_move_insn (old_reg, old_mem); > > @@ -23234,50 +23232,125 @@ void ix86_expand_atomic_fetch_op_loop (rtx target, rtx mem, rtx val, > if (after) > emit_move_insn (target, new_reg); > > - /* Load memory again inside loop. */ > - new_mem = copy_to_reg (mem); > - /* Compare mem value with expected value. */ > + success = NULL_RTX; > + > + ix86_expand_cmpxchg_loop (&success, old_mem, mem, old_reg, new_reg, > + gen_int_mode (MEMMODEL_SYNC_SEQ_CST, > + SImode), > + doubleword, loop_label); > +} > + > +/* Relax cmpxchg instruction, param loop_label indicates whether > + the instruction should be relaxed with a pause loop. If not, > + it will be relaxed to an atomic load + compare, and skip > + cmpxchg instruction if mem != exp_input. */ > + > +void ix86_expand_cmpxchg_loop (rtx *ptarget_bool, rtx target_val, > + rtx mem, rtx exp_input, rtx new_input, > + rtx mem_model, bool doubleword, > + rtx_code_label *loop_label) > +{ > + rtx_code_label *cmp_label = NULL; > + rtx_code_label *done_label = NULL; > + rtx target_bool = NULL_RTX, new_mem = NULL_RTX; > + rtx (*gen) (rtx, rtx, rtx, rtx, rtx) = NULL; > + rtx (*gendw) (rtx, rtx, rtx, rtx, rtx, rtx) = NULL; > + machine_mode mode = GET_MODE (target_val), hmode = mode; > + > + if (*ptarget_bool == NULL) > + target_bool = gen_reg_rtx (QImode); > + else > + target_bool = *ptarget_bool; > + > + cmp_label = gen_label_rtx (); > + done_label = gen_label_rtx (); > + > + new_mem = gen_reg_rtx (mode); > + /* Load memory first. */ > + expand_atomic_load (new_mem, mem, MEMMODEL_SEQ_CST); > + > + switch (mode) > + { > + case TImode: > + gendw = gen_atomic_compare_and_swapti_doubleword; > + hmode = DImode; > + break; > + case DImode: > + if (doubleword) > + { > + gendw = gen_atomic_compare_and_swapdi_doubleword; > + hmode = SImode; > + } > + else > + gen = gen_atomic_compare_and_swapdi_1; > + break; > + case SImode: > + gen = gen_atomic_compare_and_swapsi_1; break; > + case HImode: > + gen = gen_atomic_compare_and_swaphi_1; break; > + case QImode: > + gen = gen_atomic_compare_and_swapqi_1; break; > + default: > + gcc_unreachable (); > + } > > + /* Compare mem value with expected value. */ > if (doubleword) > { > - machine_mode half_mode = (mode == DImode)? SImode : DImode; > - rtx low_new_mem = gen_lowpart (half_mode, new_mem); > - rtx low_old_mem = gen_lowpart (half_mode, old_mem); > - rtx high_new_mem = gen_highpart (half_mode, new_mem); > - rtx high_old_mem = gen_highpart (half_mode, old_mem); > - emit_cmp_and_jump_insns (low_new_mem, low_old_mem, NE, NULL_RTX, > - half_mode, 1, pause_label, > + rtx low_new_mem = gen_lowpart (hmode, new_mem); > + rtx low_exp_input = gen_lowpart (hmode, exp_input); > + rtx high_new_mem = gen_highpart (hmode, new_mem); > + rtx high_exp_input = gen_highpart (hmode, exp_input); > + emit_cmp_and_jump_insns (low_new_mem, low_exp_input, NE, NULL_RTX, > + hmode, 1, cmp_label, > profile_probability::guessed_never ()); > - emit_cmp_and_jump_insns (high_new_mem, high_old_mem, NE, NULL_RTX, > - half_mode, 1, pause_label, > + emit_cmp_and_jump_insns (high_new_mem, high_exp_input, NE, NULL_RTX, > + hmode, 1, cmp_label, > profile_probability::guessed_never ()); > } > else > - emit_cmp_and_jump_insns (new_mem, old_mem, NE, NULL_RTX, > - GET_MODE (old_mem), 1, pause_label, > + emit_cmp_and_jump_insns (new_mem, exp_input, NE, NULL_RTX, > + GET_MODE (exp_input), 1, cmp_label, > profile_probability::guessed_never ()); > > - success = NULL_RTX; > - oldval = old_mem; > - expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg, > - new_reg, false, MEMMODEL_SYNC_SEQ_CST, > - MEMMODEL_RELAXED); > - if (oldval != old_mem) > - emit_move_insn (old_mem, oldval); > - > - emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx, > - GET_MODE (success), 1, loop_label, > - profile_probability::guessed_never ()); > - > - emit_jump_insn (gen_jump (done_label)); > - emit_barrier (); > - > - /* If mem is not expected, pause and loop back. */ > - emit_label (pause_label); > - emit_insn (gen_pause ()); > - emit_jump_insn (gen_jump (loop_label)); > - emit_barrier (); > - emit_label (done_label); > + /* Directly emits cmpxchg here. */ > + if (doubleword) > + emit_insn (gendw (target_val, mem, exp_input, > + gen_lowpart (hmode, new_input), > + gen_highpart (hmode, new_input), > + mem_model)); > + else > + emit_insn (gen (target_val, mem, exp_input, new_input, mem_model)); > + > + if (!loop_label) > + { > + emit_jump_insn (gen_jump (done_label)); > + emit_barrier (); > + emit_label (cmp_label); > + emit_move_insn (target_val, new_mem); > + emit_label (done_label); > + ix86_expand_setcc (target_bool, EQ, gen_rtx_REG (CCZmode, FLAGS_REG), > + const0_rtx); > + } > + else > + { > + ix86_expand_setcc (target_bool, EQ, gen_rtx_REG (CCZmode, FLAGS_REG), > + const0_rtx); > + emit_cmp_and_jump_insns (target_bool, const0_rtx, EQ, const0_rtx, > + GET_MODE (target_bool), 1, loop_label, > + profile_probability::guessed_never ()); > + emit_jump_insn (gen_jump (done_label)); > + emit_barrier (); > + > + /* If mem is not expected, pause and loop back. */ > + emit_label (cmp_label); > + emit_insn (gen_pause ()); > + emit_jump_insn (gen_jump (loop_label)); > + emit_barrier (); > + emit_label (done_label); > + } > + > + *ptarget_bool = target_bool; > } > > #include "gt-i386-expand.h" > diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h > index b7e9aa75d25..d5e11259a5a 100644 > --- a/gcc/config/i386/i386-protos.h > +++ b/gcc/config/i386/i386-protos.h > @@ -221,6 +221,8 @@ extern void ix86_split_mmx_punpck (rtx[], bool); > extern void ix86_expand_avx_vzeroupper (void); > extern void ix86_expand_atomic_fetch_op_loop (rtx, rtx, rtx, enum rtx_code, > bool, bool); > +extern void ix86_expand_cmpxchg_loop (rtx *, rtx, rtx, rtx, rtx, rtx, > + bool, rtx_code_label *); > > #ifdef TREE_CODE > extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int); > diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md > index 36417c54c11..820e9ca911a 100644 > --- a/gcc/config/i386/sync.md > +++ b/gcc/config/i386/sync.md > @@ -373,11 +373,20 @@ (define_expand "atomic_compare_and_swap<mode>" > (match_operand:SI 7 "const_int_operand")] ;; failure model > "TARGET_CMPXCHG" > { > - emit_insn > - (gen_atomic_compare_and_swap<mode>_1 > - (operands[1], operands[2], operands[3], operands[4], operands[6])); > - ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG), > - const0_rtx); > + if (TARGET_RELAX_CMPXCHG_LOOP) > + { > + ix86_expand_cmpxchg_loop (&operands[0], operands[1], operands[2], > + operands[3], operands[4], operands[6], > + false, NULL); > + } > + else > + { > + emit_insn > + (gen_atomic_compare_and_swap<mode>_1 > + (operands[1], operands[2], operands[3], operands[4], operands[6])); > + ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG), > + const0_rtx); > + } > DONE; > }) > > @@ -397,25 +406,35 @@ (define_expand "atomic_compare_and_swap<mode>" > (match_operand:SI 7 "const_int_operand")] ;; failure model > "TARGET_CMPXCHG" > { > - if (<MODE>mode == DImode && TARGET_64BIT) > - { > - emit_insn > - (gen_atomic_compare_and_swapdi_1 > - (operands[1], operands[2], operands[3], operands[4], operands[6])); > - } > + int doubleword = !(<MODE>mode == DImode && TARGET_64BIT); > + if (TARGET_RELAX_CMPXCHG_LOOP) > + { > + ix86_expand_cmpxchg_loop (&operands[0], operands[1], operands[2], > + operands[3], operands[4], operands[6], > + doubleword, NULL); > + } > else > - { > - machine_mode hmode = <CASHMODE>mode; > - > - emit_insn > - (gen_atomic_compare_and_swap<mode>_doubleword > - (operands[1], operands[2], operands[3], > - gen_lowpart (hmode, operands[4]), gen_highpart (hmode, operands[4]), > - operands[6])); > - } > - > - ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG), > - const0_rtx); > + { > + if (!doubleword) > + { > + emit_insn > + (gen_atomic_compare_and_swapdi_1 > + (operands[1], operands[2], operands[3], operands[4], operands[6])); > + } > + else > + { > + machine_mode hmode = <CASHMODE>mode; > + > + emit_insn > + (gen_atomic_compare_and_swap<mode>_doubleword > + (operands[1], operands[2], operands[3], > + gen_lowpart (hmode, operands[4]), gen_highpart (hmode, operands[4]), > + operands[6])); > + } > + > + ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG), > + const0_rtx); > + } > DONE; > }) > > diff --git a/gcc/testsuite/gcc.target/i386/pr103069-2.c b/gcc/testsuite/gcc.target/i386/pr103069-2.c > index b3f2235fd55..2ceb54e013a 100644 > --- a/gcc/testsuite/gcc.target/i386/pr103069-2.c > +++ b/gcc/testsuite/gcc.target/i386/pr103069-2.c > @@ -40,12 +40,12 @@ FUNC_ATOMIC_RELAX (char, xor) > TYPE c = 11, d = 101; \ > res = relax_##TYPE##_##OP##_fetch (&a, b); \ > exp = f_##TYPE##_##OP##_fetch (&c, d); \ > - if (res != exp) \ > + if (res != exp || a != c) \ > abort (); \ > a = c = 21, b = d = 92; \ > res = relax_##TYPE##_fetch_##OP (&a, b); \ > exp = f_##TYPE##_fetch_##OP (&c, d); \ > - if (res != exp) \ > + if (res != exp || a != c) \ > abort (); \ > } > > diff --git a/gcc/testsuite/gcc.target/i386/pr103069-3.c b/gcc/testsuite/gcc.target/i386/pr103069-3.c > new file mode 100644 > index 00000000000..fee708905ec > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr103069-3.c > @@ -0,0 +1,24 @@ > +/* PR target/103068 */ > +/* { dg-do compile } */ > +/* { dg-additional-options "-O2 -march=x86-64 -mtune=generic -mrelax-cmpxchg-loop" } */ > + > +#include <stdint.h> > + > +#define FUNC_CMPXCHG(TYPE) \ > +__attribute__ ((noinline, noclone)) \ > +TYPE f_##TYPE##_cmpxchg (TYPE *lock, TYPE newval, TYPE oldval) \ > +{ \ > + do \ > + { \ > + newval = oldval | 1; \ > + } while (! __atomic_compare_exchange_n (lock, &oldval, newval, \ > + 0, __ATOMIC_RELEASE, \ > + __ATOMIC_RELAXED)); \ > + return *lock; \ > +} > + > + > +FUNC_CMPXCHG (int64_t) > +FUNC_CMPXCHG (int) > +FUNC_CMPXCHG (short) > +FUNC_CMPXCHG (char) > diff --git a/gcc/testsuite/gcc.target/i386/pr103069-4.c b/gcc/testsuite/gcc.target/i386/pr103069-4.c > new file mode 100644 > index 00000000000..5abcbca2a98 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/i386/pr103069-4.c > @@ -0,0 +1,43 @@ > +/* PR target/103069 */ > +/* { dg-do run } */ > +/* { dg-additional-options "-O2 -march=x86-64 -mtune=generic" } */ > + > +#include <stdlib.h> > +#include "pr103069-3.c" > + > +#define FUNC_CMPXCHG_RELAX(TYPE) \ > +__attribute__ ((noinline, noclone, target ("relax-cmpxchg-loop"))) \ > +TYPE relax_##TYPE##_cmpxchg (TYPE *lock, TYPE newval, TYPE oldval) \ > +{ \ > + do \ > + { \ > + newval = oldval | 1; \ > + } while (! __atomic_compare_exchange_n (lock, &oldval, newval, \ > + 0, __ATOMIC_RELEASE, \ > + __ATOMIC_RELAXED)); \ > + return *lock; \ > +} > + > +FUNC_CMPXCHG_RELAX (int64_t) > +FUNC_CMPXCHG_RELAX (int) > +FUNC_CMPXCHG_RELAX (short) > +FUNC_CMPXCHG_RELAX (char) > + > +#define TEST_CMPXCHG_LOOP(TYPE) \ > +{ \ > + TYPE a = 11, b = 20, c = 11, res, exp; \ > + TYPE d = 11, e = 20, f = 11; \ > + res = relax_##TYPE##_cmpxchg (&a, b, c); \ > + exp = f_##TYPE##_cmpxchg (&d, e, f); \ > + if (res != exp || a != d) \ > + abort (); \ > +} > + > +int main (void) > +{ > + TEST_CMPXCHG_LOOP (int64_t) > + TEST_CMPXCHG_LOOP (int) > + TEST_CMPXCHG_LOOP (short) > + TEST_CMPXCHG_LOOP (char) > + return 0; > +} > -- > 2.18.1 >
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc index ce9607e36de..6cf1a0b9cb6 100644 --- a/gcc/config/i386/i386-expand.cc +++ b/gcc/config/i386/i386-expand.cc @@ -23203,16 +23203,14 @@ void ix86_expand_atomic_fetch_op_loop (rtx target, rtx mem, rtx val, enum rtx_code code, bool after, bool doubleword) { - rtx old_reg, new_reg, old_mem, success, oldval, new_mem; - rtx_code_label *loop_label, *pause_label, *done_label; + rtx old_reg, new_reg, old_mem, success; machine_mode mode = GET_MODE (target); + rtx_code_label *loop_label = NULL; old_reg = gen_reg_rtx (mode); new_reg = old_reg; - loop_label = gen_label_rtx (); - pause_label = gen_label_rtx (); - done_label = gen_label_rtx (); old_mem = copy_to_reg (mem); + loop_label = gen_label_rtx (); emit_label (loop_label); emit_move_insn (old_reg, old_mem); @@ -23234,50 +23232,125 @@ void ix86_expand_atomic_fetch_op_loop (rtx target, rtx mem, rtx val, if (after) emit_move_insn (target, new_reg); - /* Load memory again inside loop. */ - new_mem = copy_to_reg (mem); - /* Compare mem value with expected value. */ + success = NULL_RTX; + + ix86_expand_cmpxchg_loop (&success, old_mem, mem, old_reg, new_reg, + gen_int_mode (MEMMODEL_SYNC_SEQ_CST, + SImode), + doubleword, loop_label); +} + +/* Relax cmpxchg instruction, param loop_label indicates whether + the instruction should be relaxed with a pause loop. If not, + it will be relaxed to an atomic load + compare, and skip + cmpxchg instruction if mem != exp_input. */ + +void ix86_expand_cmpxchg_loop (rtx *ptarget_bool, rtx target_val, + rtx mem, rtx exp_input, rtx new_input, + rtx mem_model, bool doubleword, + rtx_code_label *loop_label) +{ + rtx_code_label *cmp_label = NULL; + rtx_code_label *done_label = NULL; + rtx target_bool = NULL_RTX, new_mem = NULL_RTX; + rtx (*gen) (rtx, rtx, rtx, rtx, rtx) = NULL; + rtx (*gendw) (rtx, rtx, rtx, rtx, rtx, rtx) = NULL; + machine_mode mode = GET_MODE (target_val), hmode = mode; + + if (*ptarget_bool == NULL) + target_bool = gen_reg_rtx (QImode); + else + target_bool = *ptarget_bool; + + cmp_label = gen_label_rtx (); + done_label = gen_label_rtx (); + + new_mem = gen_reg_rtx (mode); + /* Load memory first. */ + expand_atomic_load (new_mem, mem, MEMMODEL_SEQ_CST); + + switch (mode) + { + case TImode: + gendw = gen_atomic_compare_and_swapti_doubleword; + hmode = DImode; + break; + case DImode: + if (doubleword) + { + gendw = gen_atomic_compare_and_swapdi_doubleword; + hmode = SImode; + } + else + gen = gen_atomic_compare_and_swapdi_1; + break; + case SImode: + gen = gen_atomic_compare_and_swapsi_1; break; + case HImode: + gen = gen_atomic_compare_and_swaphi_1; break; + case QImode: + gen = gen_atomic_compare_and_swapqi_1; break; + default: + gcc_unreachable (); + } + /* Compare mem value with expected value. */ if (doubleword) { - machine_mode half_mode = (mode == DImode)? SImode : DImode; - rtx low_new_mem = gen_lowpart (half_mode, new_mem); - rtx low_old_mem = gen_lowpart (half_mode, old_mem); - rtx high_new_mem = gen_highpart (half_mode, new_mem); - rtx high_old_mem = gen_highpart (half_mode, old_mem); - emit_cmp_and_jump_insns (low_new_mem, low_old_mem, NE, NULL_RTX, - half_mode, 1, pause_label, + rtx low_new_mem = gen_lowpart (hmode, new_mem); + rtx low_exp_input = gen_lowpart (hmode, exp_input); + rtx high_new_mem = gen_highpart (hmode, new_mem); + rtx high_exp_input = gen_highpart (hmode, exp_input); + emit_cmp_and_jump_insns (low_new_mem, low_exp_input, NE, NULL_RTX, + hmode, 1, cmp_label, profile_probability::guessed_never ()); - emit_cmp_and_jump_insns (high_new_mem, high_old_mem, NE, NULL_RTX, - half_mode, 1, pause_label, + emit_cmp_and_jump_insns (high_new_mem, high_exp_input, NE, NULL_RTX, + hmode, 1, cmp_label, profile_probability::guessed_never ()); } else - emit_cmp_and_jump_insns (new_mem, old_mem, NE, NULL_RTX, - GET_MODE (old_mem), 1, pause_label, + emit_cmp_and_jump_insns (new_mem, exp_input, NE, NULL_RTX, + GET_MODE (exp_input), 1, cmp_label, profile_probability::guessed_never ()); - success = NULL_RTX; - oldval = old_mem; - expand_atomic_compare_and_swap (&success, &oldval, mem, old_reg, - new_reg, false, MEMMODEL_SYNC_SEQ_CST, - MEMMODEL_RELAXED); - if (oldval != old_mem) - emit_move_insn (old_mem, oldval); - - emit_cmp_and_jump_insns (success, const0_rtx, EQ, const0_rtx, - GET_MODE (success), 1, loop_label, - profile_probability::guessed_never ()); - - emit_jump_insn (gen_jump (done_label)); - emit_barrier (); - - /* If mem is not expected, pause and loop back. */ - emit_label (pause_label); - emit_insn (gen_pause ()); - emit_jump_insn (gen_jump (loop_label)); - emit_barrier (); - emit_label (done_label); + /* Directly emits cmpxchg here. */ + if (doubleword) + emit_insn (gendw (target_val, mem, exp_input, + gen_lowpart (hmode, new_input), + gen_highpart (hmode, new_input), + mem_model)); + else + emit_insn (gen (target_val, mem, exp_input, new_input, mem_model)); + + if (!loop_label) + { + emit_jump_insn (gen_jump (done_label)); + emit_barrier (); + emit_label (cmp_label); + emit_move_insn (target_val, new_mem); + emit_label (done_label); + ix86_expand_setcc (target_bool, EQ, gen_rtx_REG (CCZmode, FLAGS_REG), + const0_rtx); + } + else + { + ix86_expand_setcc (target_bool, EQ, gen_rtx_REG (CCZmode, FLAGS_REG), + const0_rtx); + emit_cmp_and_jump_insns (target_bool, const0_rtx, EQ, const0_rtx, + GET_MODE (target_bool), 1, loop_label, + profile_probability::guessed_never ()); + emit_jump_insn (gen_jump (done_label)); + emit_barrier (); + + /* If mem is not expected, pause and loop back. */ + emit_label (cmp_label); + emit_insn (gen_pause ()); + emit_jump_insn (gen_jump (loop_label)); + emit_barrier (); + emit_label (done_label); + } + + *ptarget_bool = target_bool; } #include "gt-i386-expand.h" diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h index b7e9aa75d25..d5e11259a5a 100644 --- a/gcc/config/i386/i386-protos.h +++ b/gcc/config/i386/i386-protos.h @@ -221,6 +221,8 @@ extern void ix86_split_mmx_punpck (rtx[], bool); extern void ix86_expand_avx_vzeroupper (void); extern void ix86_expand_atomic_fetch_op_loop (rtx, rtx, rtx, enum rtx_code, bool, bool); +extern void ix86_expand_cmpxchg_loop (rtx *, rtx, rtx, rtx, rtx, rtx, + bool, rtx_code_label *); #ifdef TREE_CODE extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int); diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md index 36417c54c11..820e9ca911a 100644 --- a/gcc/config/i386/sync.md +++ b/gcc/config/i386/sync.md @@ -373,11 +373,20 @@ (define_expand "atomic_compare_and_swap<mode>" (match_operand:SI 7 "const_int_operand")] ;; failure model "TARGET_CMPXCHG" { - emit_insn - (gen_atomic_compare_and_swap<mode>_1 - (operands[1], operands[2], operands[3], operands[4], operands[6])); - ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG), - const0_rtx); + if (TARGET_RELAX_CMPXCHG_LOOP) + { + ix86_expand_cmpxchg_loop (&operands[0], operands[1], operands[2], + operands[3], operands[4], operands[6], + false, NULL); + } + else + { + emit_insn + (gen_atomic_compare_and_swap<mode>_1 + (operands[1], operands[2], operands[3], operands[4], operands[6])); + ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG), + const0_rtx); + } DONE; }) @@ -397,25 +406,35 @@ (define_expand "atomic_compare_and_swap<mode>" (match_operand:SI 7 "const_int_operand")] ;; failure model "TARGET_CMPXCHG" { - if (<MODE>mode == DImode && TARGET_64BIT) - { - emit_insn - (gen_atomic_compare_and_swapdi_1 - (operands[1], operands[2], operands[3], operands[4], operands[6])); - } + int doubleword = !(<MODE>mode == DImode && TARGET_64BIT); + if (TARGET_RELAX_CMPXCHG_LOOP) + { + ix86_expand_cmpxchg_loop (&operands[0], operands[1], operands[2], + operands[3], operands[4], operands[6], + doubleword, NULL); + } else - { - machine_mode hmode = <CASHMODE>mode; - - emit_insn - (gen_atomic_compare_and_swap<mode>_doubleword - (operands[1], operands[2], operands[3], - gen_lowpart (hmode, operands[4]), gen_highpart (hmode, operands[4]), - operands[6])); - } - - ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG), - const0_rtx); + { + if (!doubleword) + { + emit_insn + (gen_atomic_compare_and_swapdi_1 + (operands[1], operands[2], operands[3], operands[4], operands[6])); + } + else + { + machine_mode hmode = <CASHMODE>mode; + + emit_insn + (gen_atomic_compare_and_swap<mode>_doubleword + (operands[1], operands[2], operands[3], + gen_lowpart (hmode, operands[4]), gen_highpart (hmode, operands[4]), + operands[6])); + } + + ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG), + const0_rtx); + } DONE; }) diff --git a/gcc/testsuite/gcc.target/i386/pr103069-2.c b/gcc/testsuite/gcc.target/i386/pr103069-2.c index b3f2235fd55..2ceb54e013a 100644 --- a/gcc/testsuite/gcc.target/i386/pr103069-2.c +++ b/gcc/testsuite/gcc.target/i386/pr103069-2.c @@ -40,12 +40,12 @@ FUNC_ATOMIC_RELAX (char, xor) TYPE c = 11, d = 101; \ res = relax_##TYPE##_##OP##_fetch (&a, b); \ exp = f_##TYPE##_##OP##_fetch (&c, d); \ - if (res != exp) \ + if (res != exp || a != c) \ abort (); \ a = c = 21, b = d = 92; \ res = relax_##TYPE##_fetch_##OP (&a, b); \ exp = f_##TYPE##_fetch_##OP (&c, d); \ - if (res != exp) \ + if (res != exp || a != c) \ abort (); \ } diff --git a/gcc/testsuite/gcc.target/i386/pr103069-3.c b/gcc/testsuite/gcc.target/i386/pr103069-3.c new file mode 100644 index 00000000000..fee708905ec --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr103069-3.c @@ -0,0 +1,24 @@ +/* PR target/103068 */ +/* { dg-do compile } */ +/* { dg-additional-options "-O2 -march=x86-64 -mtune=generic -mrelax-cmpxchg-loop" } */ + +#include <stdint.h> + +#define FUNC_CMPXCHG(TYPE) \ +__attribute__ ((noinline, noclone)) \ +TYPE f_##TYPE##_cmpxchg (TYPE *lock, TYPE newval, TYPE oldval) \ +{ \ + do \ + { \ + newval = oldval | 1; \ + } while (! __atomic_compare_exchange_n (lock, &oldval, newval, \ + 0, __ATOMIC_RELEASE, \ + __ATOMIC_RELAXED)); \ + return *lock; \ +} + + +FUNC_CMPXCHG (int64_t) +FUNC_CMPXCHG (int) +FUNC_CMPXCHG (short) +FUNC_CMPXCHG (char) diff --git a/gcc/testsuite/gcc.target/i386/pr103069-4.c b/gcc/testsuite/gcc.target/i386/pr103069-4.c new file mode 100644 index 00000000000..5abcbca2a98 --- /dev/null +++ b/gcc/testsuite/gcc.target/i386/pr103069-4.c @@ -0,0 +1,43 @@ +/* PR target/103069 */ +/* { dg-do run } */ +/* { dg-additional-options "-O2 -march=x86-64 -mtune=generic" } */ + +#include <stdlib.h> +#include "pr103069-3.c" + +#define FUNC_CMPXCHG_RELAX(TYPE) \ +__attribute__ ((noinline, noclone, target ("relax-cmpxchg-loop"))) \ +TYPE relax_##TYPE##_cmpxchg (TYPE *lock, TYPE newval, TYPE oldval) \ +{ \ + do \ + { \ + newval = oldval | 1; \ + } while (! __atomic_compare_exchange_n (lock, &oldval, newval, \ + 0, __ATOMIC_RELEASE, \ + __ATOMIC_RELAXED)); \ + return *lock; \ +} + +FUNC_CMPXCHG_RELAX (int64_t) +FUNC_CMPXCHG_RELAX (int) +FUNC_CMPXCHG_RELAX (short) +FUNC_CMPXCHG_RELAX (char) + +#define TEST_CMPXCHG_LOOP(TYPE) \ +{ \ + TYPE a = 11, b = 20, c = 11, res, exp; \ + TYPE d = 11, e = 20, f = 11; \ + res = relax_##TYPE##_cmpxchg (&a, b, c); \ + exp = f_##TYPE##_cmpxchg (&d, e, f); \ + if (res != exp || a != d) \ + abort (); \ +} + +int main (void) +{ + TEST_CMPXCHG_LOOP (int64_t) + TEST_CMPXCHG_LOOP (int) + TEST_CMPXCHG_LOOP (short) + TEST_CMPXCHG_LOOP (char) + return 0; +}