diff mbox

[i386,RFC] HLE support in GCC

Message ID CAFULd4a+rCzKUwax1XKbXfhsokKdz9sPrsY_AQUkUDTy0JQeCg@mail.gmail.com
State New
Headers show

Commit Message

Uros Bizjak April 10, 2012, 5:05 p.m. UTC
On Tue, Apr 10, 2012 at 4:20 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> On Tue, Apr 10, 2012 at 06:12:08PM +0400, Kirill Yukhin wrote:
>> Attached patch implements HLE support for __atomic_compare_exchange_n.
>
> The target hook is definitely not appropriate, just define it in
> ix86_target_macros in i386-c.c instead or so.

Also, I think it is better to pass operand that holds the model
constant to the final insn and conditionally output xacquire/xrelease
based on INTVAL of this operand.

Something like in attached patch, but probably with a helper function in i386.c.

Uros.
diff mbox

Patch

Index: config/i386/sync.md
===================================================================
--- config/i386/sync.md	(revision 186282)
+++ config/i386/sync.md	(working copy)
@@ -315,8 +315,9 @@ 
    (match_operand:SI 7 "const_int_operand")]	;; failure model
   "TARGET_CMPXCHG"
 {
-  emit_insn (gen_atomic_compare_and_swap_single<mode>
-	     (operands[1], operands[2], operands[3], operands[4]));
+  emit_insn
+   (gen_atomic_compare_and_swap_single<mode>
+    (operands[1], operands[2], operands[3], operands[4], operands[6]));
   ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
 		     const0_rtx);
   DONE;
@@ -344,8 +345,9 @@ 
 {
   if (<MODE>mode == DImode && TARGET_64BIT)
     {
-      emit_insn (gen_atomic_compare_and_swap_singledi
-		 (operands[1], operands[2], operands[3], operands[4]));
+      emit_insn
+       (gen_atomic_compare_and_swap_singledi
+	(operands[1], operands[2], operands[3], operands[4], operands[6]));
     }
   else
     {
@@ -370,7 +372,7 @@ 
 	mem = replace_equiv_address (mem, force_reg (Pmode, XEXP (mem, 0)));
 
       emit_insn (gen_atomic_compare_and_swap_double<mode>
-		 (lo_o, hi_o, mem, lo_e, hi_e, lo_n, hi_n));
+		 (lo_o, hi_o, mem, lo_e, hi_e, lo_n, hi_n, operands[6]));
     }
   ix86_expand_setcc (operands[0], EQ, gen_rtx_REG (CCZmode, FLAGS_REG),
 		     const0_rtx);
@@ -382,15 +384,24 @@ 
 	(unspec_volatile:SWI
 	  [(match_operand:SWI 1 "memory_operand" "+m")
 	   (match_operand:SWI 2 "register_operand" "0")
-	   (match_operand:SWI 3 "register_operand" "<r>")]
+	   (match_operand:SWI 3 "register_operand" "<r>")
+	   (match_operand:SI 4 "const_int_operand")]
 	  UNSPECV_CMPXCHG_1))
    (set (match_dup 1)
 	(unspec_volatile:SWI [(const_int 0)] UNSPECV_CMPXCHG_2))
    (set (reg:CCZ FLAGS_REG)
         (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_3))]
   "TARGET_CMPXCHG"
-  "lock{%;} cmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}")
+{
+  static char buf[128];
+  const char *hle
+    = (INTVAL (operands[4]) & 8) ? "xacquire " : "";
 
+  snprintf (buf, sizeof (buf),
+  	    "lock{%;} %scmpxchg{<imodesuffix>}\t{%3, %1|%1, %3}", hle);
+  return buf;
+})
+
 ;; For double-word compare and swap, we are obliged to play tricks with
 ;; the input newval (op5:op6) because the Intel register numbering does
 ;; not match the gcc register numbering, so the pair must be CX:BX.
@@ -403,7 +414,8 @@ 
 	   (match_operand:<DCASHMODE> 3 "register_operand" "0")
 	   (match_operand:<DCASHMODE> 4 "register_operand" "1")
 	   (match_operand:<DCASHMODE> 5 "register_operand" "b")
-	   (match_operand:<DCASHMODE> 6 "register_operand" "c")]
+	   (match_operand:<DCASHMODE> 6 "register_operand" "c")
+	   (match_operand:SI 7 "const_int_operand")]
 	  UNSPECV_CMPXCHG_1))
    (set (match_operand:<DCASHMODE> 1 "register_operand" "=d")
 	(unspec_volatile:<DCASHMODE> [(const_int 0)] UNSPECV_CMPXCHG_2))
@@ -412,8 +424,16 @@ 
    (set (reg:CCZ FLAGS_REG)
         (unspec_volatile:CCZ [(const_int 0)] UNSPECV_CMPXCHG_4))]
   ""
-  "lock{%;} cmpxchg<doublemodesuffix>b\t%2")
+{
+  static char buf[128];
+  const char *hle
+    = (INTVAL (operands[7]) & 8) ? "xacquire " : "";
 
+  snprintf (buf, sizeof (buf),
+  	    "lock{%;} %scmpxchg<doublemodesuffix>b\t%2", hle);
+  return buf;
+})
+
 ;; Theoretically we'd like to use constraint "r" (any reg) for op5,
 ;; but that includes ecx.  If op5 and op6 are the same (like when
 ;; the input is -1LL) GCC might chose to allocate op5 to ecx, like