diff mbox

[cxx-mem-model] i386 atomic load/store

Message ID 4EAF7B5E.5050700@twiddle.net
State New
Headers show

Commit Message

Richard Henderson Nov. 1, 2011, 4:53 a.m. UTC
I'm considering the following.  Does anyone believe this i386/i486 decision
re DImode is a mistake?  Should I limit that to Pentium by checking cmpxchg?


r~
diff mbox

Patch

diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 7ce57d8..7d28e43 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -248,6 +248,9 @@ 
   ;; For BMI2 support
   UNSPEC_PDEP
   UNSPEC_PEXT
+
+  ;; For __atomic support
+  UNSPEC_MOVA
 ])
 
 (define_c_enum "unspecv" [
diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index e5579b1..da08e92 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -46,6 +46,88 @@ 
   "lock{%;} or{l}\t{$0, (%%esp)|DWORD PTR [esp], 0}"
   [(set_attr "memory" "unknown")])
 
+;; ??? From volume 3 section 7.1.1 Guaranteed Atomic Operations,
+;; Only beginning at Pentium family processors do we get any guarantee of
+;; atomicity in aligned 64-bit quantities.  Beginning at P6, we get a
+;; guarantee for 64-bit accesses that do not cross a cacheline boundary.
+;; This distinction is ignored below, since I *suspect* that FSTLL will
+;; appear atomic from the point of view of user-level threads even back
+;; on the 80386; I suspect that the non-atomicity can only be seen from
+;; other bus-level devices.
+;;
+;; Importantly, *no* processor makes atomicity guarantees for larger
+;; accesses.  In particular, there's no way to perform an atomic TImode
+;; move, despite the apparent applicability of MOVDQA et al.
+
+(define_mode_iterator ATOMIC
+   [QI HI SI (DI "TARGET_64BIT || TARGET_80387 || TARGET_SSE")])
+
+(define_expand "atomic_load<mode>"
+  [(set (match_operand:ATOMIC 0 "register_operand" "")
+	(unspec:ATOMIC [(match_operand:ATOMIC 1 "memory_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")]
+		       UNSPEC_MOVA))]
+  ""
+{
+  /* For DImode on 32-bit, we can use the FPU to perform the load.  */
+  if (<MODE>mode == DImode && !TARGET_64BIT)
+    emit_insn (gen_atomic_loaddi_fpu (operands[1], operands[2]));
+  else
+    emit_move_insn (operands[0], operands[1]);
+  DONE;
+})
+
+(define_insn_and_split "atomic_loaddi_fpu"
+  [(set (match_operand:DI 0 "register_operand" "=fx")
+	(unspec:DI [(match_operand:DI 1 "memory_operand" "m")]
+		   UNSPEC_MOVA))]
+  "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))])
+
+(define_expand "atomic_store<mode>"
+  [(set (match_operand:ATOMIC 0 "memory_operand" "")
+	(unspec:ATOMIC [(match_operand:ATOMIC 1 "register_operand" "")
+			(match_operand:SI 2 "const_int_operand" "")]
+		       UNSPEC_MOVA))]
+  ""
+{
+  enum memmodel model = (enum memmodel) INTVAL (operands[2]);
+
+  if (<MODE>mode == DImode && !TARGET_64BIT)
+    {
+      /* For DImode on 32-bit, we can use the FPU to perform the store.  */
+      emit_insn (gen_atomic_storedi_fpu (operands[1], operands[2]));
+      if (model == MEMMODEL_SEQ_CST)
+	emit_insn (gen_mem_thread_fence (operands[2]));
+    }
+  else
+    {
+      /* For non-seq-cst stores, we can simply just perform the store.  */
+      if (model != MEMMODEL_SEQ_CST)
+	{
+	  emit_move_insn (operands[0], operands[1]);
+	  DONE;
+	}
+
+      /* For sub-word-size, sequentialy-consistent stores, use xchg.  */
+      emit_insn (gen_atomic_exchange<mode> (gen_reg_rtx (<MODE>mode),
+					    operands[0], operands[1],
+					    operands[2]));
+    }
+  DONE;
+})
+
+(define_insn_and_split "atomic_storedi_fpu"
+  [(set (match_operand:DI 0 "memory_operand" "=m")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "fx")]
+		   UNSPEC_MOVA))]
+  "!TARGET_64BIT && (TARGET_80387 || TARGET_SSE)"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (match_dup 1))])
+
 (define_expand "atomic_compare_and_swap<mode>"
   [(match_operand:QI 0 "register_operand" "")		;; bool success output
    (match_operand:SWI124 1 "register_operand" "")	;; oldval output