Patchwork [2/5] arm: Emit swp for pre-armv6.

login
register
mail settings
Submitter Richard Henderson
Date Dec. 1, 2011, 12:44 a.m.
Message ID <1322700249-4693-3-git-send-email-rth@redhat.com>
Download mbox | patch
Permalink /patch/128614/
State New
Headers show

Comments

Richard Henderson - Dec. 1, 2011, 12:44 a.m.
---
 gcc/config/arm/arm.h   |    6 ++++
 gcc/config/arm/sync.md |   63 +++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 68 insertions(+), 1 deletions(-)
Richard Earnshaw - Dec. 1, 2011, 10:59 a.m.
Sorry, no.

It's essential we don't emit SWP instructions directly into code on any
platform where that code may possibly be run on a later core that has
LDREX/STREX.  If we do that we'll end up with a mess that can't be resolved.

In the absence of known OS helper functions the only solution to this is
to call library helper functions than can be replaced once to fix the
whole application (including any shared libraries if applicable).  There
cannot be a mix of the two strategies.

I also think that GCC should NOT provide those helper functions, though
we should probably write a document describing how a user might do so.

The model we've been using on Linux is, I think the only viable one: if
we have LDREX/STREX in the instruction set, then we use it directly;
otherwise we call a library function.  The system developer must then
ensure that their helper function does the "right thing" to make all the
code work.

R.

On 01/12/11 00:44, Richard Henderson wrote:
> ---
>  gcc/config/arm/arm.h   |    6 ++++
>  gcc/config/arm/sync.md |   63 +++++++++++++++++++++++++++++++++++++++++++++++-
>  2 files changed, 68 insertions(+), 1 deletions(-)
> 
> diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
> index 31f4856..33e5b8e 100644
> --- a/gcc/config/arm/arm.h
> +++ b/gcc/config/arm/arm.h
> @@ -276,6 +276,12 @@ extern void (*arm_lang_output_object_attributes_hook)(void);
>  /* Nonzero if this chip implements a memory barrier instruction.  */
>  #define TARGET_HAVE_MEMORY_BARRIER (TARGET_HAVE_DMB || TARGET_HAVE_DMB_MCR)
>  
> +/* Nonzero if this chip supports swp and swpb.  These are technically present
> +   post-armv6, but deprecated.  Never use it if we have OS support, as swp is
> +   not well-defined on SMP systems.  */
> +#define TARGET_HAVE_SWP \
> +  (TARGET_ARM && arm_arch4 && !arm_arch6 && arm_abi != ARM_ABI_AAPCS_LINUX)
> +
>  /* Nonzero if this chip supports ldrex and strex */
>  #define TARGET_HAVE_LDREX	((arm_arch6 && TARGET_ARM) || arm_arch7)
>  
> diff --git a/gcc/config/arm/sync.md b/gcc/config/arm/sync.md
> index 124ebf0..72e7181 100644
> --- a/gcc/config/arm/sync.md
> +++ b/gcc/config/arm/sync.md
> @@ -26,6 +26,10 @@
>     (DI "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN
>  	&& TARGET_HAVE_MEMORY_BARRIER")])
>  
> +(define_mode_attr swp_predtab
> +  [(QI "TARGET_HAVE_SWP") (HI "false")
> +   (SI "TARGET_HAVE_SWP") (DI "false")])
> +
>  (define_code_iterator syncop [plus minus ior xor and])
>  
>  (define_code_attr sync_optab
> @@ -132,7 +136,41 @@
>      DONE;
>    })
>  
> -(define_insn_and_split "atomic_exchange<mode>"
> +(define_expand "atomic_exchange<mode>"
> +  [(match_operand:QHSD 0 "s_register_operand" "")
> +   (match_operand:QHSD 1 "mem_noofs_operand" "")
> +   (match_operand:QHSD 2 "s_register_operand" "r")
> +   (match_operand:SI 3 "const_int_operand" "")]
> +  "<sync_predtab> || <swp_predtab>"
> +{
> +  if (<sync_predtab>)
> +    emit_insn (gen_atomic_exchange<mode>_rex (operands[0], operands[1],
> +					      operands[2], operands[3]));
> +  else
> +    {
> +      /* Memory barriers are introduced in armv6, which also gains the
> +	 ldrex insns.  Therefore we can ignore the memory model argument
> +	 when issuing a SWP instruction.  */
> +      gcc_checking_assert (!TARGET_HAVE_MEMORY_BARRIER);
> +
> +      if (<MODE>mode == QImode)
> +	{
> +	  rtx x = gen_reg_rtx (SImode);
> +          emit_insn (gen_atomic_exchangeqi_swp (x, operands[1], operands[2]));
> +	  emit_move_insn (operands[0], gen_lowpart (QImode, x));
> +	}
> +      else if (<MODE>mode == SImode)
> +	{
> +	  emit_insn (gen_atomic_exchangesi_swp
> +		     (operands[0], operands[1], operands[2]));
> +	}
> +      else
> +	gcc_unreachable ();
> +    }
> +  DONE;
> +})
> +
> +(define_insn_and_split "atomic_exchange<mode>_rex"
>    [(set (match_operand:QHSD 0 "s_register_operand" "=&r")	;; output
>  	(match_operand:QHSD 1 "mem_noofs_operand" "+Ua"))	;; memory
>     (set (match_dup 1)
> @@ -152,6 +190,29 @@
>      DONE;
>    })
>  
> +(define_insn "atomic_exchangeqi_swp"
> +  [(set (match_operand:SI 0 "s_register_operand" "=&r")		;; output
> +	(zero_extend:SI
> +	  (match_operand:QI 1 "mem_noofs_operand" "+Ua")))	;; memory
> +   (set (match_dup 1)
> +	(unspec_volatile:QI
> +	  [(match_operand:QI 2 "s_register_operand" "r")]	;; input
> +	  VUNSPEC_ATOMIC_XCHG))]
> +  "TARGET_HAVE_SWP"
> +  "swpb%?\t%0, %2, %C1"
> +  [(set_attr "predicable" "yes")])
> +
> +(define_insn "atomic_exchangesi_swp"
> +  [(set (match_operand:SI 0 "s_register_operand" "=&r")		;; output
> +	(match_operand:SI 1 "mem_noofs_operand" "+Ua"))		;; memory
> +   (set (match_dup 1)
> +	(unspec_volatile:SI
> +	  [(match_operand:SI 2 "s_register_operand" "r")]	;; input
> +	  VUNSPEC_ATOMIC_XCHG))]
> +  "TARGET_HAVE_SWP"
> +  "swp%?\t%0, %2, %C1"
> +  [(set_attr "predicable" "yes")])
> +
>  (define_mode_attr atomic_op_operand
>    [(QI "reg_or_int_operand")
>     (HI "reg_or_int_operand")
Richard Henderson - Dec. 1, 2011, 3:49 p.m.
On 12/01/2011 02:59 AM, Richard Earnshaw wrote:
> It's essential we don't emit SWP instructions directly into code on any
> platform where that code may possibly be run on a later core that has
> LDREX/STREX.  If we do that we'll end up with a mess that can't be resolved.

Ok.  It's easy enough to drop that patch.

> I also think that GCC should NOT provide those helper functions, though
> we should probably write a document describing how a user might do so.

I'll refer you to MacLeod at this point and the atomics support library...


r~
Andrew MacLeod - Dec. 1, 2011, 4:59 p.m.
On 12/01/2011 10:49 AM, Richard Henderson wrote:
> On 12/01/2011 02:59 AM, Richard Earnshaw wrote:
>> It's essential we don't emit SWP instructions directly into code on any
>> platform where that code may possibly be run on a later core that has
>> LDREX/STREX.  If we do that we'll end up with a mess that can't be resolved.
> Ok.  It's easy enough to drop that patch.
>
>> I also think that GCC should NOT provide those helper functions, though
>> we should probably write a document describing how a user might do so.
> I'll refer you to MacLeod at this point and the atomics support library...
>

What helper functions?   the __atomic_*  ones when lock free routines 
cannot be provided? they are defined here:
http://gcc.gnu.org/wiki/Atomic/GCCMM/LIbrary

and recently I tried starting a discussion on its future:

http://gcc.gnu.org/ml/gcc/2011-11/msg00503.html

follow up any opinions there I guess :-)

Andrew

Patch

diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 31f4856..33e5b8e 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -276,6 +276,12 @@  extern void (*arm_lang_output_object_attributes_hook)(void);
 /* Nonzero if this chip implements a memory barrier instruction.  */
 #define TARGET_HAVE_MEMORY_BARRIER (TARGET_HAVE_DMB || TARGET_HAVE_DMB_MCR)
 
+/* Nonzero if this chip supports swp and swpb.  These are technically present
+   post-armv6, but deprecated.  Never use it if we have OS support, as swp is
+   not well-defined on SMP systems.  */
+#define TARGET_HAVE_SWP \
+  (TARGET_ARM && arm_arch4 && !arm_arch6 && arm_abi != ARM_ABI_AAPCS_LINUX)
+
 /* Nonzero if this chip supports ldrex and strex */
 #define TARGET_HAVE_LDREX	((arm_arch6 && TARGET_ARM) || arm_arch7)
 
diff --git a/gcc/config/arm/sync.md b/gcc/config/arm/sync.md
index 124ebf0..72e7181 100644
--- a/gcc/config/arm/sync.md
+++ b/gcc/config/arm/sync.md
@@ -26,6 +26,10 @@ 
    (DI "TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN
 	&& TARGET_HAVE_MEMORY_BARRIER")])
 
+(define_mode_attr swp_predtab
+  [(QI "TARGET_HAVE_SWP") (HI "false")
+   (SI "TARGET_HAVE_SWP") (DI "false")])
+
 (define_code_iterator syncop [plus minus ior xor and])
 
 (define_code_attr sync_optab
@@ -132,7 +136,41 @@ 
     DONE;
   })
 
-(define_insn_and_split "atomic_exchange<mode>"
+(define_expand "atomic_exchange<mode>"
+  [(match_operand:QHSD 0 "s_register_operand" "")
+   (match_operand:QHSD 1 "mem_noofs_operand" "")
+   (match_operand:QHSD 2 "s_register_operand" "r")
+   (match_operand:SI 3 "const_int_operand" "")]
+  "<sync_predtab> || <swp_predtab>"
+{
+  if (<sync_predtab>)
+    emit_insn (gen_atomic_exchange<mode>_rex (operands[0], operands[1],
+					      operands[2], operands[3]));
+  else
+    {
+      /* Memory barriers are introduced in armv6, which also gains the
+	 ldrex insns.  Therefore we can ignore the memory model argument
+	 when issuing a SWP instruction.  */
+      gcc_checking_assert (!TARGET_HAVE_MEMORY_BARRIER);
+
+      if (<MODE>mode == QImode)
+	{
+	  rtx x = gen_reg_rtx (SImode);
+          emit_insn (gen_atomic_exchangeqi_swp (x, operands[1], operands[2]));
+	  emit_move_insn (operands[0], gen_lowpart (QImode, x));
+	}
+      else if (<MODE>mode == SImode)
+	{
+	  emit_insn (gen_atomic_exchangesi_swp
+		     (operands[0], operands[1], operands[2]));
+	}
+      else
+	gcc_unreachable ();
+    }
+  DONE;
+})
+
+(define_insn_and_split "atomic_exchange<mode>_rex"
   [(set (match_operand:QHSD 0 "s_register_operand" "=&r")	;; output
 	(match_operand:QHSD 1 "mem_noofs_operand" "+Ua"))	;; memory
    (set (match_dup 1)
@@ -152,6 +190,29 @@ 
     DONE;
   })
 
+(define_insn "atomic_exchangeqi_swp"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r")		;; output
+	(zero_extend:SI
+	  (match_operand:QI 1 "mem_noofs_operand" "+Ua")))	;; memory
+   (set (match_dup 1)
+	(unspec_volatile:QI
+	  [(match_operand:QI 2 "s_register_operand" "r")]	;; input
+	  VUNSPEC_ATOMIC_XCHG))]
+  "TARGET_HAVE_SWP"
+  "swpb%?\t%0, %2, %C1"
+  [(set_attr "predicable" "yes")])
+
+(define_insn "atomic_exchangesi_swp"
+  [(set (match_operand:SI 0 "s_register_operand" "=&r")		;; output
+	(match_operand:SI 1 "mem_noofs_operand" "+Ua"))		;; memory
+   (set (match_dup 1)
+	(unspec_volatile:SI
+	  [(match_operand:SI 2 "s_register_operand" "r")]	;; input
+	  VUNSPEC_ATOMIC_XCHG))]
+  "TARGET_HAVE_SWP"
+  "swp%?\t%0, %2, %C1"
+  [(set_attr "predicable" "yes")])
+
 (define_mode_attr atomic_op_operand
   [(QI "reg_or_int_operand")
    (HI "reg_or_int_operand")