Patchwork [2/2] Support __ATOMIC_HLE_RELEASE for __atomic_clear/store_n

login
register
mail settings
Submitter Uros Bizjak
Date Jan. 14, 2013, 7:21 p.m.
Message ID <CAFULd4ZKTmpJ5+-FkMYGV9Ee2QhT80VC9itF-3sDUeNC4djgKA@mail.gmail.com>
Download mbox | patch
Permalink /patch/211865/
State New
Headers show

Comments

Uros Bizjak - Jan. 14, 2013, 7:21 p.m.
On Mon, Jan 14, 2013 at 8:01 PM, Andi Kleen <andi@firstfloor.org> wrote:
>> >> This cannot happen, we reject code that sets both __HLE* flags.
>> >
>> > BTW I found more HLE bugs, it looks like some of the fetch_op_*
>> > patterns do not match always and fall back to cmpxchg, which
>> > does not generate HLE code correctly. Not fully sure what's
>> > wrong, can you spot any obvious problems? You changed the
>> >
>> > (define_insn "atomic_<logic><mode>"
>> >
>> > pattern last.
>>
>> I don't think this is a target problem, these insns work as expected
>> and are covered by extensive testsuite in gcc.target/i386/hle-*.c.
>
> Well the C++ test cases I wrote didn't work. It may be related to
> how complex the program is. Simple calls as in the original
> test suite seem to work.
>
> e.g.  instead of xacquire lock and ... it ended up with a cmpxchg loop
> (which I think is a fallback path). The cmpxchg loop didn't include
> a HLE prefix (and simply adding one is not enoigh, would need more
> changes for successfull elision)
>
> Before HLE the cmpxchg code was correct, just somewhat inefficient.
> Even with HLE it is technically correct, just it'll never elide.

I'd start with attached (mechanical) patch that just blindly adds
masks where memory model is checked. Please note that ATOMIC_HLE
modifies high bits of the model, so these checks fail in presence of
HLE modifiers.

Uros.

Patch

Index: emit-rtl.c
===================================================================
--- emit-rtl.c	(revision 195152)
+++ emit-rtl.c	(working copy)
@@ -6014,7 +6014,7 @@  insn_file (const_rtx insn)
 bool
 need_atomic_barrier_p (enum memmodel model, bool pre)
 {
-  switch (model)
+  switch (model & MEMMODEL_MASK)
     {
     case MEMMODEL_RELAXED:
     case MEMMODEL_CONSUME:
Index: optabs.c
===================================================================
--- optabs.c	(revision 195152)
+++ optabs.c	(working copy)
@@ -7008,9 +7008,9 @@  maybe_emit_sync_lock_test_and_set (rtx target, rtx
      exists, and the memory model is stronger than acquire, add a release 
      barrier before the instruction.  */
 
-  if (model == MEMMODEL_SEQ_CST
-      || model == MEMMODEL_RELEASE
-      || model == MEMMODEL_ACQ_REL)
+  if ((model & MEMMODEL_MASK) == MEMMODEL_SEQ_CST
+      || (model & MEMMODEL_MASK) == MEMMODEL_RELEASE
+      || (model & MEMMODEL_MASK) == MEMMODEL_ACQ_REL)
     expand_mem_thread_fence (model);
 
   if (icode != CODE_FOR_nothing)
@@ -7388,7 +7388,7 @@  expand_mem_thread_fence (enum memmodel model)
 {
   if (HAVE_mem_thread_fence)
     emit_insn (gen_mem_thread_fence (GEN_INT (model)));
-  else if (model != MEMMODEL_RELAXED)
+  else if ((model & MEMMODEL_MASK) != MEMMODEL_RELAXED)
     {
       if (HAVE_memory_barrier)
 	emit_insn (gen_memory_barrier ());
@@ -7412,7 +7412,7 @@  expand_mem_signal_fence (enum memmodel model)
 {
   if (HAVE_mem_signal_fence)
     emit_insn (gen_mem_signal_fence (GEN_INT (model)));
-  else if (model != MEMMODEL_RELAXED)
+  else if ((model & MEMMODEL_MASK) != MEMMODEL_RELAXED)
     {
       /* By default targets are coherent between a thread and the signal
 	 handler running on the same thread.  Thus this really becomes a
@@ -7467,7 +7467,7 @@  expand_atomic_load (rtx target, rtx mem, enum memm
     target = gen_reg_rtx (mode);
 
   /* For SEQ_CST, emit a barrier before the load.  */
-  if (model == MEMMODEL_SEQ_CST)
+  if ((model & MEMMODEL_MASK) == MEMMODEL_SEQ_CST)
     expand_mem_thread_fence (model);
 
   emit_move_insn (target, mem);
@@ -7513,7 +7513,7 @@  expand_atomic_store (rtx mem, rtx val, enum memmod
 	  if (maybe_expand_insn (icode, 2, ops))
 	    {
 	      /* lock_release is only a release barrier.  */
-	      if (model == MEMMODEL_SEQ_CST)
+	      if ((model & MEMMODEL_MASK) == MEMMODEL_SEQ_CST)
 		expand_mem_thread_fence (model);
 	      return const0_rtx;
 	    }
@@ -7540,7 +7540,7 @@  expand_atomic_store (rtx mem, rtx val, enum memmod
   emit_move_insn (mem, val);
 
   /* For SEQ_CST, also emit a barrier after the store.  */
-  if (model == MEMMODEL_SEQ_CST)
+  if ((model & MEMMODEL_MASK) == MEMMODEL_SEQ_CST)
     expand_mem_thread_fence (model);
 
   return const0_rtx;