Patchwork [RFA/ARM,2/3] Add vectorizer support for VFMA

login
register
mail settings
Submitter Matthew Gretton-Dann
Date June 25, 2012, 2:59 p.m.
Message ID <4FE87CB9.4080803@arm.com>
Download mbox | patch
Permalink /patch/167134/
State New
Headers show

Comments

Matthew Gretton-Dann - June 25, 2012, 2:59 p.m.
All,

This patch adds vectoriser support for VFMA to the ARM Neon backend.

Note that the VFP VFNMA and VFNMS instructions do not have Neon
equivalents.

OK?

gcc/ChangeLog:

2012-06-25  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>

	* config/arm/neon.md (fma<mode>4): New pattern.
	(*fmsub<mode>4): Likewise.

2012-06-25  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>

	* gcc.target/arm/neon-vfma-1.c: New testcase.
	* gcc.target/arm/neon-vfms-1.c: Likewise.
	* lib/target-supports.exp (add_options_for_arm_neonv2): New
	function.
	(check_effective_target_arm_neonv2_ok_nocache): Likewise.
	(check_effective_target_arm_neonv2_ok): Likewise.
	(check_effective_target_arm_neonv2_hw): Likewise.
	(check_effective_target_arm_neonv2): Likewise.

Thanks,

Matt
Richard Earnshaw - June 26, 2012, 1:59 p.m.
On 25/06/12 15:59, Matthew Gretton-Dann wrote:
> All,
> 
> This patch adds vectoriser support for VFMA to the ARM Neon backend.
> 
> Note that the VFP VFNMA and VFNMS instructions do not have Neon
> equivalents.
> 
> OK?

Sorry, no.  The neon versions of FMA do not handle denormalized values,
so this needs to reject vectorization unless
flag_unsafe_math_optimizations is true.

R.

> 
> gcc/ChangeLog:
> 
> 2012-06-25  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>
> 
> 	* config/arm/neon.md (fma<mode>4): New pattern.
> 	(*fmsub<mode>4): Likewise.
> 
> 2012-06-25  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>
> 
> 	* gcc.target/arm/neon-vfma-1.c: New testcase.
> 	* gcc.target/arm/neon-vfms-1.c: Likewise.
> 	* lib/target-supports.exp (add_options_for_arm_neonv2): New
> 	function.
> 	(check_effective_target_arm_neonv2_ok_nocache): Likewise.
> 	(check_effective_target_arm_neonv2_ok): Likewise.
> 	(check_effective_target_arm_neonv2_hw): Likewise.
> 	(check_effective_target_arm_neonv2): Likewise.
> 
> Thanks,
> 
> Matt
> 
> 
> 0002-Add-vectorizer-support-for-VFMA.txt
> 
> 
> diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
> index 4568dea..4d12fb3 100644
> --- a/gcc/config/arm/neon.md
> +++ b/gcc/config/arm/neon.md
> @@ -711,6 +711,33 @@
>                                      (const_string "neon_mla_qqq_32_qqd_32_scalar")))))]
>  )
>  
> +;; Fused multiply-accumulate
> +(define_insn "fma<mode>4"
> +  [(set (match_operand:VCVTF 0 "register_operand" "=w")
> +        (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
> +		 (match_operand:VCVTF 2 "register_operand" "w")
> +		 (match_operand:VCVTF 3 "register_operand" "0")))]
> +  "TARGET_NEON && TARGET_FMA"
> +  "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
> +  [(set (attr "neon_type")
> +	(if_then_else (match_test "<Is_d_reg>")
> +		      (const_string "neon_fp_vmla_ddd")
> +		      (const_string "neon_fp_vmla_qqq")))]
> +)
> +
> +(define_insn "*fmsub<mode>4"
> +  [(set (match_operand:VCVTF 0 "register_operand" "=w")
> +        (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
> +		   (match_operand:VCVTF 2 "register_operand" "w")
> +		   (match_operand:VCVTF 3 "register_operand" "0")))]
> +  "TARGET_NEON && TARGET_FMA"
> +  "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
> +  [(set (attr "neon_type")
> +	(if_then_else (match_test "<Is_d_reg>")
> +		      (const_string "neon_fp_vmla_ddd")
> +		      (const_string "neon_fp_vmla_qqq")))]
> +)
> +
>  (define_insn "ior<mode>3"
>    [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
>  	(ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
> diff --git a/gcc/testsuite/gcc.target/arm/neon-vfma-1.c b/gcc/testsuite/gcc.target/arm/neon-vfma-1.c
> new file mode 100644
> index 0000000..a003a82
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/neon-vfma-1.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_neonv2_ok } */
> +/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
> +/* { dg-add-options arm_neonv2 } */
> +/* { dg-final { scan-assembler "vfma\\.f32\[	\]+\[dDqQ]" } } */
> +
> +/* Verify that VFMA is used.  */
> +void f1(int n, float a, float x[], float y[]) {
> +  int i;
> +  for (i = 0; i < n; ++i)
> +    y[i] = a * x[i] + y[i];
> +}
> diff --git a/gcc/testsuite/gcc.target/arm/neon-vfms-1.c b/gcc/testsuite/gcc.target/arm/neon-vfms-1.c
> new file mode 100644
> index 0000000..8cefd8a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/neon-vfms-1.c
> @@ -0,0 +1,12 @@
> +/* { dg-do compile } */
> +/* { dg-require-effective-target arm_neonv2_ok } */
> +/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
> +/* { dg-add-options arm_neonv2 } */
> +/* { dg-final { scan-assembler "vfms\\.f32\[	\]+\[dDqQ]" } } */
> +
> +/* Verify that VFMS is used.  */
> +void f1(int n, float a, float x[], float y[]) {
> +  int i;
> +  for (i = 0; i < n; ++i)
> +    y[i] = a * -x[i] + y[i];
> +}
> diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
> index bc5baa7..9fc8a5c 100644
> --- a/gcc/testsuite/lib/target-supports.exp
> +++ b/gcc/testsuite/lib/target-supports.exp
> @@ -2082,6 +2082,19 @@ proc add_options_for_arm_neon { flags } {
>      return "$flags $et_arm_neon_flags"
>  }
>  
> +# Add the options needed for NEON.  We need either -mfloat-abi=softfp
> +# or -mfloat-abi=hard, but if one is already specified by the
> +# multilib, use it.  Similarly, if a -mfpu option already enables
> +# NEON, do not add -mfpu=neon.
> +
> +proc add_options_for_arm_neonv2 { flags } {
> +    if { ! [check_effective_target_arm_neonv2_ok] } {
> +	return "$flags"
> +    }
> +    global et_arm_neonv2_flags
> +    return "$flags $et_arm_neonv2_flags"
> +}
> +
>  # Return 1 if this is an ARM target supporting -mfpu=neon
>  # -mfloat-abi=softfp or equivalent options.  Some multilibs may be
>  # incompatible with these options.  Also set et_arm_neon_flags to the
> @@ -2110,6 +2123,38 @@ proc check_effective_target_arm_neon_ok { } {
>  		check_effective_target_arm_neon_ok_nocache]
>  }
>  
> +# Return 1 if this is an ARM target supporting -mfpu=neon-vfpv4
> +# -mfloat-abi=softfp or equivalent options.  Some multilibs may be
> +# incompatible with these options.  Also set et_arm_neonv2_flags to the
> +# best options to add.
> +
> +proc check_effective_target_arm_neonv2_ok_nocache { } {
> +    global et_arm_neonv2_flags
> +    set et_arm_neonv2_flags ""
> +    if { [check_effective_target_arm32] } {
> +	foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-vfpv4" "-mfpu=neon-vfpv4 -mfloat-abi=softfp"} {
> +	    if { [check_no_compiler_messages_nocache arm_neonv2_ok object {
> +		#include "arm_neon.h"
> +		float32x2_t 
> +		foo (float32x2_t a, float32x2_t b, float32x2_t c)
> +                {
> +                  return vfma_f32 (a, b, c);
> +                }
> +	    } "$flags"] } {
> +		set et_arm_neonv2_flags $flags
> +		return 1
> +	    }
> +	}
> +    }
> +
> +    return 0
> +}
> +
> +proc check_effective_target_arm_neonv2_ok { } {
> +    return [check_cached_effective_target arm_neonv2_ok \
> +		check_effective_target_arm_neonv2_ok_nocache]
> +}
> +
>  # Add the options needed for NEON.  We need either -mfloat-abi=softfp
>  # or -mfloat-abi=hard, but if one is already specified by the
>  # multilib, use it.
> @@ -2301,6 +2346,21 @@ proc check_effective_target_arm_neon_hw { } {
>      } [add_options_for_arm_neon ""]]
>  }
>  
> +proc check_effective_target_arm_neonv2_hw { } {
> +    return [check_runtime arm_neon_hwv2_available {
> +	#include "arm_neon.h"
> +	int
> +	main (void)
> +	{
> +	  float32x2_t a, b, c;
> +	  asm ("vfma.f32 %P0, %P1, %P2"
> +	       : "=w" (a)
> +	       : "w" (b), "w" (c));
> +	  return 0;
> +	}
> +    } [add_options_for_arm_neonv2 ""]]
> +}
> +
>  # Return 1 if this is a ARM target with NEON enabled.
>  
>  proc check_effective_target_arm_neon { } {
> @@ -2317,6 +2377,24 @@ proc check_effective_target_arm_neon { } {
>      }
>  }
>  
> +proc check_effective_target_arm_neonv2 { } {
> +    if { [check_effective_target_arm32] } {
> +	return [check_no_compiler_messages arm_neon object {
> +	    #ifndef __ARM_NEON__
> +	    #error not NEON
> +	    #else
> +	    #ifndef __ARM_FEATURE_FMA
> +	    #error not NEONv2
> +            #else
> +	    int dummy;
> +	    #endif
> +	    #endif
> +	}]
> +    } else {
> +	return 0
> +    }
> +}
> +
>  # Return 1 if this a Loongson-2E or -2F target using an ABI that supports
>  # the Loongson vector modes.
>  
>

Patch

diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 4568dea..4d12fb3 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -711,6 +711,33 @@ 
                                     (const_string "neon_mla_qqq_32_qqd_32_scalar")))))]
 )
 
+;; Fused multiply-accumulate
+(define_insn "fma<mode>4"
+  [(set (match_operand:VCVTF 0 "register_operand" "=w")
+        (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
+		 (match_operand:VCVTF 2 "register_operand" "w")
+		 (match_operand:VCVTF 3 "register_operand" "0")))]
+  "TARGET_NEON && TARGET_FMA"
+  "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+	(if_then_else (match_test "<Is_d_reg>")
+		      (const_string "neon_fp_vmla_ddd")
+		      (const_string "neon_fp_vmla_qqq")))]
+)
+
+(define_insn "*fmsub<mode>4"
+  [(set (match_operand:VCVTF 0 "register_operand" "=w")
+        (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
+		   (match_operand:VCVTF 2 "register_operand" "w")
+		   (match_operand:VCVTF 3 "register_operand" "0")))]
+  "TARGET_NEON && TARGET_FMA"
+  "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set (attr "neon_type")
+	(if_then_else (match_test "<Is_d_reg>")
+		      (const_string "neon_fp_vmla_ddd")
+		      (const_string "neon_fp_vmla_qqq")))]
+)
+
 (define_insn "ior<mode>3"
   [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
 	(ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
diff --git a/gcc/testsuite/gcc.target/arm/neon-vfma-1.c b/gcc/testsuite/gcc.target/arm/neon-vfma-1.c
new file mode 100644
index 0000000..a003a82
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vfma-1.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neonv2_ok } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+/* { dg-add-options arm_neonv2 } */
+/* { dg-final { scan-assembler "vfma\\.f32\[	\]+\[dDqQ]" } } */
+
+/* Verify that VFMA is used.  */
+void f1(int n, float a, float x[], float y[]) {
+  int i;
+  for (i = 0; i < n; ++i)
+    y[i] = a * x[i] + y[i];
+}
diff --git a/gcc/testsuite/gcc.target/arm/neon-vfms-1.c b/gcc/testsuite/gcc.target/arm/neon-vfms-1.c
new file mode 100644
index 0000000..8cefd8a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/neon-vfms-1.c
@@ -0,0 +1,12 @@ 
+/* { dg-do compile } */
+/* { dg-require-effective-target arm_neonv2_ok } */
+/* { dg-options "-O2 -ftree-vectorize -ffast-math" } */
+/* { dg-add-options arm_neonv2 } */
+/* { dg-final { scan-assembler "vfms\\.f32\[	\]+\[dDqQ]" } } */
+
+/* Verify that VFMS is used.  */
+void f1(int n, float a, float x[], float y[]) {
+  int i;
+  for (i = 0; i < n; ++i)
+    y[i] = a * -x[i] + y[i];
+}
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index bc5baa7..9fc8a5c 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -2082,6 +2082,19 @@  proc add_options_for_arm_neon { flags } {
     return "$flags $et_arm_neon_flags"
 }
 
+# Add the options needed for NEON.  We need either -mfloat-abi=softfp
+# or -mfloat-abi=hard, but if one is already specified by the
+# multilib, use it.  Similarly, if a -mfpu option already enables
+# NEON, do not add -mfpu=neon.
+
+proc add_options_for_arm_neonv2 { flags } {
+    if { ! [check_effective_target_arm_neonv2_ok] } {
+	return "$flags"
+    }
+    global et_arm_neonv2_flags
+    return "$flags $et_arm_neonv2_flags"
+}
+
 # Return 1 if this is an ARM target supporting -mfpu=neon
 # -mfloat-abi=softfp or equivalent options.  Some multilibs may be
 # incompatible with these options.  Also set et_arm_neon_flags to the
@@ -2110,6 +2123,38 @@  proc check_effective_target_arm_neon_ok { } {
 		check_effective_target_arm_neon_ok_nocache]
 }
 
+# Return 1 if this is an ARM target supporting -mfpu=neon-vfpv4
+# -mfloat-abi=softfp or equivalent options.  Some multilibs may be
+# incompatible with these options.  Also set et_arm_neonv2_flags to the
+# best options to add.
+
+proc check_effective_target_arm_neonv2_ok_nocache { } {
+    global et_arm_neonv2_flags
+    set et_arm_neonv2_flags ""
+    if { [check_effective_target_arm32] } {
+	foreach flags {"" "-mfloat-abi=softfp" "-mfpu=neon-vfpv4" "-mfpu=neon-vfpv4 -mfloat-abi=softfp"} {
+	    if { [check_no_compiler_messages_nocache arm_neonv2_ok object {
+		#include "arm_neon.h"
+		float32x2_t 
+		foo (float32x2_t a, float32x2_t b, float32x2_t c)
+                {
+                  return vfma_f32 (a, b, c);
+                }
+	    } "$flags"] } {
+		set et_arm_neonv2_flags $flags
+		return 1
+	    }
+	}
+    }
+
+    return 0
+}
+
+proc check_effective_target_arm_neonv2_ok { } {
+    return [check_cached_effective_target arm_neonv2_ok \
+		check_effective_target_arm_neonv2_ok_nocache]
+}
+
 # Add the options needed for NEON.  We need either -mfloat-abi=softfp
 # or -mfloat-abi=hard, but if one is already specified by the
 # multilib, use it.
@@ -2301,6 +2346,21 @@  proc check_effective_target_arm_neon_hw { } {
     } [add_options_for_arm_neon ""]]
 }
 
+proc check_effective_target_arm_neonv2_hw { } {
+    return [check_runtime arm_neon_hwv2_available {
+	#include "arm_neon.h"
+	int
+	main (void)
+	{
+	  float32x2_t a, b, c;
+	  asm ("vfma.f32 %P0, %P1, %P2"
+	       : "=w" (a)
+	       : "w" (b), "w" (c));
+	  return 0;
+	}
+    } [add_options_for_arm_neonv2 ""]]
+}
+
 # Return 1 if this is a ARM target with NEON enabled.
 
 proc check_effective_target_arm_neon { } {
@@ -2317,6 +2377,24 @@  proc check_effective_target_arm_neon { } {
     }
 }
 
+proc check_effective_target_arm_neonv2 { } {
+    if { [check_effective_target_arm32] } {
+	return [check_no_compiler_messages arm_neon object {
+	    #ifndef __ARM_NEON__
+	    #error not NEON
+	    #else
+	    #ifndef __ARM_FEATURE_FMA
+	    #error not NEONv2
+            #else
+	    int dummy;
+	    #endif
+	    #endif
+	}]
+    } else {
+	return 0
+    }
+}
+
 # Return 1 if this a Loongson-2E or -2F target using an ABI that supports
 # the Loongson vector modes.