diff mbox

[i386] : Fix PR 58945, Improve atomic_compare_and_swap*_doubleword pattern

Message ID CAFULd4YMsN16LjfXev84tsNZHyAKwqdeASdmWmwek94bVz3SUw@mail.gmail.com
State New
Headers show

Commit Message

Uros Bizjak March 31, 2015, 5:12 p.m. UTC
Hello!

As shown in the PR, the attached patch substantial improves generated
code when cmpxchg}8,16}b insn is involved. Following testcase:

--cut here--
__int128_t i;

int main()
{
  __atomic_store_16(&i, -1, 0);
  if (i != -1)
    __builtin_abort();
  return 0;
}
--cut here--

compiles with -O2 -mcx16 to:

        movq    i(%rip), %rax
        movq    $-1, %rcx
        movq    i+8(%rip), %rdx
.L2:
        movq    %rcx, %rbx
        lock cmpxchg16b        i(%rip)
        jne     .L2

where without the patch, the compiler generated:

        movq    i(%rip), %rsi
        movq    $-1, %rcx
        movq    i+8(%rip), %rdi
.L2:
        movq    %rsi, %rax
        movq    %rdi, %rdx
        movq    %rcx, %rbx
        lock cmpxchg16b i(%rip)
        movq    %rdx, %rdi
        movq    %rax, %rsi
        jne     .L2

2015-03-31  Uros Bizjak  <ubizjak@gmail.com>

    PR target/58945
    * config/i386/sync.md (atomic_compare_and_swap<dwi>_doubleword):
    Do not split operands 0 and operands 2 to halfmode.
    (atomic_compare_and_swap<mode>): Update for
    atomic_compare_and_swap<dwi>_doubleword changes.

Patch was bootstrapped and regression tested on x86_64-linux-gnu
{,-m32} and was committed to mainline.

Uros.
diff mbox

Patch

Index: config/i386/sync.md
===================================================================
--- config/i386/sync.md	(revision 221786)
+++ config/i386/sync.md	(working copy)
@@ -351,21 +351,12 @@ 
   else
     {
       machine_mode hmode = <CASHMODE>mode;
-      rtx lo_o, lo_e, lo_n, hi_o, hi_e, hi_n;
 
-      lo_o = operands[1];
-      lo_e = operands[3];
-      lo_n = operands[4];
-      hi_o = gen_highpart (hmode, lo_o);
-      hi_e = gen_highpart (hmode, lo_e);
-      hi_n = gen_highpart (hmode, lo_n);
-      lo_o = gen_lowpart (hmode, lo_o);
-      lo_e = gen_lowpart (hmode, lo_e);
-      lo_n = gen_lowpart (hmode, lo_n);
-
       emit_insn
        (gen_atomic_compare_and_swap<mode>_doubleword
-        (lo_o, hi_o, operands[2], lo_e, hi_e, lo_n, hi_n, operands[6]));
+        (operands[1], operands[2], operands[3],
+	 gen_lowpart (hmode, operands[4]), gen_highpart (hmode, operands[4]),
+	 operands[6]));
     }
 
   ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
@@ -389,31 +380,26 @@ 
   "lock{%;} %K4cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
 
 ;; For double-word compare and swap, we are obliged to play tricks with
-;; the input newval (op5:op6) because the Intel register numbering does
+;; the input newval (op3:op4) because the Intel register numbering does
 ;; not match the gcc register numbering, so the pair must be CX:BX.
-;; That said, in order to take advantage of possible lower-subreg opts,
-;; treat all of the integral operands in the same way.
 
 (define_mode_attr doublemodesuffix [(SI "8") (DI "16")])
 
 (define_insn "atomic_compare_and_swap<dwi>_doubleword"
-  [(set (match_operand:DWIH 0 "register_operand" "=a")
-	(unspec_volatile:DWIH
-	  [(match_operand:<DWI> 2 "memory_operand" "+m")
-	   (match_operand:DWIH 3 "register_operand" "0")
-	   (match_operand:DWIH 4 "register_operand" "1")
-	   (match_operand:DWIH 5 "register_operand" "b")
-	   (match_operand:DWIH 6 "register_operand" "c")
-	   (match_operand:SI 7 "const_int_operand")]
+  [(set (match_operand:<DWI> 0 "register_operand" "=A")
+	(unspec_volatile:<DWI>
+	  [(match_operand:<DWI> 1 "memory_operand" "+m")
+	   (match_operand:<DWI> 2 "register_operand" "0")
+	   (match_operand:DWIH 3 "register_operand" "b")
+	   (match_operand:DWIH 4 "register_operand" "c")
+	   (match_operand:SI 5 "const_int_operand")]
 	  UNSPECV_CMPXCHG))
-   (set (match_operand:DWIH 1 "register_operand" "=d")
-	(unspec_volatile:DWIH [(const_int 0)] UNSPECV_CMPXCHG))
-   (set (match_dup 2)
+   (set (match_dup 1)
 	(unspec_volatile:<DWI> [(const_int 0)] UNSPECV_CMPXCHG))
    (set (reg:CCZ FLAGS_REG)
         (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG))]
   "TARGET_CMPXCHG<doublemodesuffix>B"
-  "lock{%;} %K7cmpxchg<doublemodesuffix>b\t%2")
+  "lock{%;} %K5cmpxchg<doublemodesuffix>b\t%1")
 
 ;; For operand 2 nonmemory_operand predicate is used instead of
 ;; register_operand to allow combiner to better optimize atomic