diff mbox

[Aarch64,2/3] Use the atomic compare-and-swap instructions when available.

Message ID 55CB1CBD.9030204@foss.arm.com
State New
Headers show

Commit Message

Matthew Wahab Aug. 12, 2015, 10:15 a.m. UTC
ARMv8.1 adds instructions for atomic compare-and-swap with optional memory
ordering specifiers. This patch adds the compare-and-swap instructions and
changes the atomic_compare_and_swap patterns to use them

The changes to the atomic_compare_and_swap pattern makes the new
instructions available as an alternative for the existing expander, to
be used when the target supports them. The existing expander is reused
so that it can generate code needed to meet the requirements of the
atomic_compare_and_swap name.

Using the atomic CAS instructions, the code generated for a call to
__atomic_compare_exchange (ptr, expected, desired, weak, smodel, fmodel)
becomes:

   mov r, r1
   cas<smo><sz> r, r2, [r0]
   cmp r, r1
   cset r0, eq
   cbnz r0, L
   strb r, [r1]
L:
   ret

where
   r0 = ptr, r1 = *expected, r2 = *desired,
   r is some temporary.
   mo is one of {'', 'a', 'l', 'al'}, depending on smodel
   sz is one of {'', 'b', 'h'} depending on the data size.

Tested the series for aarch64-none-linux-gnu with native bootstrap and make
check and for aarch64-none-elf with cross-compiled check-gcc. Also tested
aarch64-none-elf with cross-compiled check-gcc on an emulator that supports
ARMv8.1.

Ok for trunk?
Matthew

2015-08-12  Matthew Wahab  <matthew.wahab@arm.com>

	* config/aarch64/aarch64-protos.h
	(aarch64_gen_atomic_cas): Declare.
	* config/aarch64/aarch64.c (aarch64_expand_compare_and_swap):
	Choose appropriate instruction pattern for the target.
	(aarch64_gen_atomic_cas): New.
	* config/aarch64/atomics.md (UNSPECV_ATOMIC_CAS): New.
	(atomic_compare_and_swap<mode>_1): Rename to
	aarch64_compare_and_swap<mode>.  Fix some indentation.
	(aarch64_compare_and_swap<mode>_lse): New.
	(aarch64_atomic_cas<mode>): New.

Comments

James Greenhalgh Aug. 12, 2015, 11:18 a.m. UTC | #1
On Wed, Aug 12, 2015 at 11:15:25AM +0100, Matthew Wahab wrote:
> ARMv8.1 adds instructions for atomic compare-and-swap with optional memory
> ordering specifiers. This patch adds the compare-and-swap instructions and
> changes the atomic_compare_and_swap patterns to use them
> 
> The changes to the atomic_compare_and_swap pattern makes the new
> instructions available as an alternative for the existing expander, to
> be used when the target supports them. The existing expander is reused
> so that it can generate code needed to meet the requirements of the
> atomic_compare_and_swap name.
> 
> Using the atomic CAS instructions, the code generated for a call to
> __atomic_compare_exchange (ptr, expected, desired, weak, smodel, fmodel)
> becomes:
> 
>    mov r, r1
>    cas<smo><sz> r, r2, [r0]
>    cmp r, r1
>    cset r0, eq
>    cbnz r0, L
>    strb r, [r1]
> L:
>    ret
> 
> where
>    r0 = ptr, r1 = *expected, r2 = *desired,
>    r is some temporary.
>    mo is one of {'', 'a', 'l', 'al'}, depending on smodel
>    sz is one of {'', 'b', 'h'} depending on the data size.
> 
> Tested the series for aarch64-none-linux-gnu with native bootstrap and make
> check and for aarch64-none-elf with cross-compiled check-gcc. Also tested
> aarch64-none-elf with cross-compiled check-gcc on an emulator that supports
> ARMv8.1.
> 
> Ok for trunk?
> Matthew

OK, but please fix up the testcases Andrew mentioned for those configuring
their toolchains/test runs with a default -mcpu or -march which targets
these instructions. I would just hardwire them
(with dg-additional-options) to -march=armv8-a.

Thanks,
James

> 2015-08-12  Matthew Wahab  <matthew.wahab@arm.com>
> 
> 	* config/aarch64/aarch64-protos.h
> 	(aarch64_gen_atomic_cas): Declare.
> 	* config/aarch64/aarch64.c (aarch64_expand_compare_and_swap):
> 	Choose appropriate instruction pattern for the target.
> 	(aarch64_gen_atomic_cas): New.
> 	* config/aarch64/atomics.md (UNSPECV_ATOMIC_CAS): New.
> 	(atomic_compare_and_swap<mode>_1): Rename to
> 	aarch64_compare_and_swap<mode>.  Fix some indentation.
> 	(aarch64_compare_and_swap<mode>_lse): New.
> 	(aarch64_atomic_cas<mode>): New.
>
diff mbox

Patch

From a84d8f8202a30fca18ed79e617d5ba3422eb021f Mon Sep 17 00:00:00 2001
From: Matthew Wahab <matthew.wahab@arm.com>
Date: Mon, 10 Aug 2015 16:59:18 +0100
Subject: [PATCH 2/3] Add and use atomic CAS instruction.

Change-Id: Ie40f345d414fc9dc6c8cbac0eb9457547aa9ec2d
---
 gcc/config/aarch64/aarch64-protos.h |   1 +
 gcc/config/aarch64/aarch64.c        |  66 ++++++++++++++++++--
 gcc/config/aarch64/atomics.md       | 117 +++++++++++++++++++++++++++++++++---
 3 files changed, 172 insertions(+), 12 deletions(-)

diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 32b5d09..0b09d49 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -362,6 +362,7 @@  rtx aarch64_load_tp (rtx);
 
 void aarch64_expand_compare_and_swap (rtx op[]);
 void aarch64_split_compare_and_swap (rtx op[]);
+void aarch64_gen_atomic_cas (rtx, rtx, rtx, rtx, rtx);
 void aarch64_split_atomic_op (enum rtx_code, rtx, rtx, rtx, rtx, rtx, rtx);
 
 bool aarch64_gen_adjusted_ldpstp (rtx *, bool, enum machine_mode, RTX_CODE);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 6da7245..259e049 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -10749,7 +10749,23 @@  aarch64_expand_compare_and_swap (rtx operands[])
 {
   rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
   machine_mode mode, cmp_mode;
-  rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
+  typedef rtx (*gen_cas_fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
+  int idx;
+  gen_cas_fn gen;
+  const gen_cas_fn split_cas[] =
+  {
+    gen_aarch64_compare_and_swapqi,
+    gen_aarch64_compare_and_swaphi,
+    gen_aarch64_compare_and_swapsi,
+    gen_aarch64_compare_and_swapdi
+  };
+  const gen_cas_fn atomic_cas[] =
+  {
+    gen_aarch64_compare_and_swapqi_lse,
+    gen_aarch64_compare_and_swaphi_lse,
+    gen_aarch64_compare_and_swapsi_lse,
+    gen_aarch64_compare_and_swapdi_lse
+  };
 
   bval = operands[0];
   rval = operands[1];
@@ -10794,13 +10810,17 @@  aarch64_expand_compare_and_swap (rtx operands[])
 
   switch (mode)
     {
-    case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
-    case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
-    case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
-    case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
+    case QImode: idx = 0; break;
+    case HImode: idx = 1; break;
+    case SImode: idx = 2; break;
+    case DImode: idx = 3; break;
     default:
       gcc_unreachable ();
     }
+  if (TARGET_LSE)
+    gen = atomic_cas[idx];
+  else
+    gen = split_cas[idx];
 
   emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
 
@@ -10829,6 +10849,42 @@  aarch64_emit_post_barrier (enum memmodel model)
     }
 }
 
+/* Emit an atomic compare-and-swap operation.  RVAL is the destination register
+   for the data in memory.  EXPECTED is the value expected to be in memory.
+   DESIRED is the value to store to memory.  MEM is the memory location.  MODEL
+   is the memory ordering to use.  */
+
+void
+aarch64_gen_atomic_cas (rtx rval, rtx mem,
+			rtx expected, rtx desired,
+			rtx model)
+{
+  rtx (*gen) (rtx, rtx, rtx, rtx);
+  machine_mode mode;
+
+  mode = GET_MODE (mem);
+
+  switch (mode)
+    {
+    case QImode: gen = gen_aarch64_atomic_casqi; break;
+    case HImode: gen = gen_aarch64_atomic_cashi; break;
+    case SImode: gen = gen_aarch64_atomic_cassi; break;
+    case DImode: gen = gen_aarch64_atomic_casdi; break;
+    default:
+      gcc_unreachable ();
+    }
+
+  /* Move the expected value into the CAS destination register.  */
+  emit_insn (gen_rtx_SET (rval, expected));
+
+  /* Emit the CAS.  */
+  emit_insn (gen (rval, mem, desired, model));
+
+  /* Compare the expected value with the value loaded by the CAS, to establish
+     whether the swap was made.  */
+  aarch64_gen_compare_reg (EQ, rval, expected);
+}
+
 /* Split a compare and swap pattern.  */
 
 void
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index 1a38ac0..7082f61 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -26,6 +26,7 @@ 
     UNSPECV_STL				; Represent an atomic store or store-release.
     UNSPECV_ATOMIC_CMPSW		; Represent an atomic compare swap.
     UNSPECV_ATOMIC_EXCHG		; Represent an atomic exchange.
+    UNSPECV_ATOMIC_CAS			; Represent an atomic CAS.
     UNSPECV_ATOMIC_OP			; Represent an atomic operation.
 ])
 
@@ -45,10 +46,10 @@ 
   }
 )
 
-(define_insn_and_split "atomic_compare_and_swap<mode>_1"
+(define_insn_and_split "aarch64_compare_and_swap<mode>"
   [(set (reg:CC CC_REGNUM)					;; bool out
     (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
-   (set (match_operand:SI 0 "register_operand" "=&r")		;; val out
+   (set (match_operand:SI 0 "register_operand" "=&r")	   ;; val out
     (zero_extend:SI
       (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory
    (set (match_dup 1)
@@ -57,7 +58,7 @@ 
        (match_operand:SHORT 3 "register_operand" "r")	;; desired
        (match_operand:SI 4 "const_int_operand")		;; is_weak
        (match_operand:SI 5 "const_int_operand")		;; mod_s
-       (match_operand:SI 6 "const_int_operand")]		;; mod_f
+       (match_operand:SI 6 "const_int_operand")]	;; mod_f
       UNSPECV_ATOMIC_CMPSW))
    (clobber (match_scratch:SI 7 "=&r"))]
   ""
@@ -70,17 +71,17 @@ 
   }
 )
 
-(define_insn_and_split "atomic_compare_and_swap<mode>_1"
+(define_insn_and_split "aarch64_compare_and_swap<mode>"
   [(set (reg:CC CC_REGNUM)					;; bool out
     (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
    (set (match_operand:GPI 0 "register_operand" "=&r")		;; val out
-    (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
+    (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q"))   ;; memory
    (set (match_dup 1)
     (unspec_volatile:GPI
       [(match_operand:GPI 2 "aarch64_plus_operand" "rI")	;; expect
        (match_operand:GPI 3 "register_operand" "r")		;; desired
-       (match_operand:SI 4 "const_int_operand")		;; is_weak
-       (match_operand:SI 5 "const_int_operand")		;; mod_s
+       (match_operand:SI 4 "const_int_operand")			;; is_weak
+       (match_operand:SI 5 "const_int_operand")			;; mod_s
        (match_operand:SI 6 "const_int_operand")]		;; mod_f
       UNSPECV_ATOMIC_CMPSW))
    (clobber (match_scratch:SI 7 "=&r"))]
@@ -94,6 +95,57 @@ 
   }
 )
 
+(define_insn_and_split "aarch64_compare_and_swap<mode>_lse"
+  [(set (reg:CC CC_REGNUM)					;; bool out
+    (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
+   (set (match_operand:SI 0 "register_operand" "=&r")		;; val out
+    (zero_extend:SI
+      (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q"))) ;; memory
+   (set (match_dup 1)
+    (unspec_volatile:SHORT
+      [(match_operand:SI 2 "aarch64_plus_operand" "rI")	;; expected
+       (match_operand:SHORT 3 "register_operand" "r")	;; desired
+       (match_operand:SI 4 "const_int_operand")		;; is_weak
+       (match_operand:SI 5 "const_int_operand")		;; mod_s
+       (match_operand:SI 6 "const_int_operand")]	;; mod_f
+      UNSPECV_ATOMIC_CMPSW))]
+  "TARGET_LSE"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_gen_atomic_cas (operands[0], operands[1],
+			    operands[2], operands[3],
+			    operands[5]);
+    DONE;
+  }
+)
+
+(define_insn_and_split "aarch64_compare_and_swap<mode>_lse"
+  [(set (reg:CC CC_REGNUM)					;; bool out
+    (unspec_volatile:CC [(const_int 0)] UNSPECV_ATOMIC_CMPSW))
+   (set (match_operand:GPI 0 "register_operand" "=&r")		;; val out
+    (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q"))   ;; memory
+   (set (match_dup 1)
+    (unspec_volatile:GPI
+      [(match_operand:GPI 2 "aarch64_plus_operand" "rI")	;; expect
+       (match_operand:GPI 3 "register_operand" "r")		;; desired
+       (match_operand:SI 4 "const_int_operand")			;; is_weak
+       (match_operand:SI 5 "const_int_operand")			;; mod_s
+       (match_operand:SI 6 "const_int_operand")]		;; mod_f
+      UNSPECV_ATOMIC_CMPSW))]
+  "TARGET_LSE "
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    aarch64_gen_atomic_cas (operands[0], operands[1],
+			    operands[2], operands[3],
+			    operands[5]);
+    DONE;
+  }
+)
+
 (define_insn_and_split "atomic_exchange<mode>"
   [(set (match_operand:ALLI 0 "register_operand" "=&r")		;; output
     (match_operand:ALLI 1 "aarch64_sync_memory_operand" "+Q")) ;; memory
@@ -370,3 +422,54 @@ 
       return "dmb\\tish";
   }
 )
+
+;; ARMv8.1 LSE instructions.
+
+;; Atomic compare-and-swap: HI and smaller modes.
+
+(define_insn "aarch64_atomic_cas<mode>"
+ [(set (match_operand:SI 0 "register_operand" "+&r")		  ;; out
+   (zero_extend:SI
+    (match_operand:SHORT 1 "aarch64_sync_memory_operand" "+Q")))  ;; memory.
+  (set (match_dup 1)
+   (unspec_volatile:SHORT
+    [(match_dup 0)
+     (match_operand:SHORT 2 "register_operand" "r")	;; value.
+     (match_operand:SI 3 "const_int_operand" "")]	;; model.
+    UNSPECV_ATOMIC_CAS))]
+ "TARGET_LSE && reload_completed"
+{
+  enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+  if (is_mm_relaxed (model))
+    return "cas<atomic_sfx>\t%<w>0, %<w>2, %1";
+  else if (is_mm_acquire (model) || is_mm_consume (model))
+    return "casa<atomic_sfx>\t%<w>0, %<w>2, %1";
+  else if (is_mm_release (model))
+    return "casl<atomic_sfx>\t%<w>0, %<w>2, %1";
+  else
+    return "casal<atomic_sfx>\t%<w>0, %<w>2, %1";
+})
+
+;; Atomic compare-and-swap: SI and larger modes.
+
+(define_insn "aarch64_atomic_cas<mode>"
+ [(set (match_operand:GPI 0 "register_operand" "+&r")	      ;; out
+   (match_operand:GPI 1 "aarch64_sync_memory_operand" "+Q"))  ;; memory.
+  (set (match_dup 1)
+   (unspec_volatile:GPI
+    [(match_dup 0)
+     (match_operand:GPI 2 "register_operand" "r")	;; value.
+     (match_operand:SI 3 "const_int_operand" "")]	;; model.
+    UNSPECV_ATOMIC_CAS))]
+  "TARGET_LSE && reload_completed"
+{
+    enum memmodel model = memmodel_from_int (INTVAL (operands[3]));
+    if (is_mm_relaxed (model))
+      return "cas<atomic_sfx>\t%<w>0, %<w>2, %1";
+    else if (is_mm_acquire (model) || is_mm_consume (model))
+      return "casa<atomic_sfx>\t%<w>0, %<w>2, %1";
+    else if (is_mm_release (model))
+      return "casl<atomic_sfx>\t%<w>0, %<w>2, %1";
+    else
+      return "casal<atomic_sfx>\t%<w>0, %<w>2, %1";
+})
-- 
1.9.1