Patchwork [RFA/ARM,1/3] Add VFP support for VFMA and friends

login
register
mail settings
Submitter Matthew Gretton-Dann
Date June 25, 2012, 2:59 p.m.
Message ID <4FE87CB4.7060103@arm.com>
Download mbox | patch
Permalink /patch/167133/
State New
Headers show

Comments

Matthew Gretton-Dann - June 25, 2012, 2:59 p.m.
All,

This patch adds support to the ARM backend for generating floating-point
fused multiply-accumulate.

OK?

gcc/ChangeLog:

2012-06-25  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>

	* config/arm/iterators.md (SDF): New mode iterator.
	(V_if_elem): Add support for SF and DF modes.
	(V_reg): Likewise.
	(F_w_constraint): New mode iterator attribute.
	(F_r_constraint): Likewise.
	(F_fma_type): Likewise.
	(F_target): Likewise.
	config/arm/vfp.md (fma<mode>4): New pattern.
	(*fmsub<mode>4): Likewise.
	(*fmnsub<mode>4): Likewise.
	(*fmnadd<mode>4): Likewise.

Thanks,

Matt
Richard Earnshaw - June 26, 2012, 1:44 p.m.
On 25/06/12 15:59, Matthew Gretton-Dann wrote:
> All,
> 
> This patch adds support to the ARM backend for generating floating-point
> fused multiply-accumulate.
> 
> OK?
> 
> gcc/ChangeLog:
> 
> 2012-06-25  Matthew Gretton-Dann  <matthew.gretton-dann@arm.com>
> 
> 	* config/arm/iterators.md (SDF): New mode iterator.
> 	(V_if_elem): Add support for SF and DF modes.
> 	(V_reg): Likewise.
> 	(F_w_constraint): New mode iterator attribute.
> 	(F_r_constraint): Likewise.
> 	(F_fma_type): Likewise.
> 	(F_target): Likewise.
> 	config/arm/vfp.md (fma<mode>4): New pattern.
> 	(*fmsub<mode>4): Likewise.
> 	(*fmnsub<mode>4): Likewise.
> 	(*fmnadd<mode>4): Likewise.
> 

F_target as an attribute name doesn't tell me anything useful.  I
suggest F_maybe_not_df.

> +  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA <F_target>"

This should be written as

   "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA && <F_maybe_not_df>"

Then the attribute should expand

  (define_mode_attr F_maybe_not_df [(SF "1") (DF "TARGET_VFP_DOUBLE")])

As I style nit, I would also suggest using the iterator name when it
appears in the pattern name, even though it is redundant.  This avoids
potential ambiguities when there are multiple iterators operating on
different expansions.  That is, instead of:

 (define_insn "fma<mode>4"

use:

 (define_insn "fma<SDF:mode>4"

OK with those changes.

R.

> Thanks,
> 
> Matt
> 
> 
> 0001-Add-VFP-support-for-VFMA-and-friends.txt
> 
> 
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index 795a5ee..3063f00 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -42,6 +42,9 @@
>  ;; A list of the 32bit and 64bit integer modes
>  (define_mode_iterator SIDI [SI DI])
>  
> +;; A list of modes which the VFP unit can handle
> +(define_mode_iterator SDF [SF DF])
> +
>  ;; Integer element sizes implemented by IWMMXT.
>  (define_mode_iterator VMMX [V2SI V4HI V8QI])
>  
> @@ -245,7 +248,8 @@
>                           (V4HI "P") (V8HI  "q")
>                           (V2SI "P") (V4SI  "q")
>                           (V2SF "P") (V4SF  "q")
> -                         (DI   "P") (V2DI  "q")])
> +                         (DI   "P") (V2DI  "q")
> +			 (SF   "")  (DF    "P")])
>  
>  ;; Wider modes with the same number of elements.
>  (define_mode_attr V_widen [(V8QI "V8HI") (V4HI "V4SI") (V2SI "V2DI")])
> @@ -303,7 +307,8 @@
>                   (V4HI "i16") (V8HI  "i16")
>                               (V2SI "i32") (V4SI  "i32")
>                               (DI   "i64") (V2DI  "i64")
> -                 (V2SF "f32") (V4SF  "f32")])
> +                 (V2SF "f32") (V4SF  "f32")
> +		 (SF "f32") (DF "f64")])
>  
>  ;; Same, but for operations which work on signed values.
>  (define_mode_attr V_s_elem [(V8QI "s8")  (V16QI "s8")
> @@ -423,6 +428,12 @@
>  ;; Mode attribute for vshll.
>  (define_mode_attr V_innermode [(V8QI "QI") (V4HI "HI") (V2SI "SI")])
>  
> +;; Mode attributes used for fused-multiply-accumulate VFP support
> +(define_mode_attr F_w_constraint [(SF "=t") (DF "=w")])
> +(define_mode_attr F_r_constraint [(SF "t") (DF "w")])
> +(define_mode_attr F_fma_type [(SF "fmacs") (DF "fmacd")])
> +(define_mode_attr F_target [(SF "") (DF "&& TARGET_VFP_DOUBLE")])
> +
>  ;;----------------------------------------------------------------------------
>  ;; Code attributes
>  ;;----------------------------------------------------------------------------
> diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
> index 2061414..2a50353 100644
> --- a/gcc/config/arm/vfp.md
> +++ b/gcc/config/arm/vfp.md
> @@ -890,6 +890,54 @@
>     (set_attr "type" "fmacd")]
>  )
>  
> +;; Fused-multiply-accumulate
> +
> +(define_insn "fma<mode>4"
> +  [(set (match_operand:SDF 0 "register_operand" "<F_w_constraint>")
> +        (fma:SDF (match_operand:SDF 1 "register_operand" "<F_r_constraint>")
> +		 (match_operand:SDF 2 "register_operand" "<F_r_constraint>")
> +		 (match_operand:SDF 3 "register_operand" "0")))]
> +  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA <F_target>"
> +  "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
> +  [(set_attr "predicable" "yes")
> +   (set_attr "type" "<F_fma_type>")]
> +)
> +
> +(define_insn "*fmsub<mode>4"
> +  [(set (match_operand:SDF 0 "register_operand" "<F_w_constraint>")
> +	(fma:SDF (neg:SDF (match_operand:SDF 1 "register_operand" 
> +					     "<F_r_constraint>"))
> +		 (match_operand:SDF 2 "register_operand" "<F_r_constraint>")
> +		 (match_operand:SDF 3 "register_operand" "0")))]
> +  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA <F_target>"
> +  "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
> +  [(set_attr "predicable" "yes")
> +   (set_attr "type" "<F_fma_type>")]
> +)
> +
> +(define_insn "*fnmsub<mode>4"
> +  [(set (match_operand:SDF 0 "register_operand" "<F_w_constraint>")
> +	(fma:SDF (match_operand:SDF 1 "register_operand"  "<F_r_constraint>")
> +		 (match_operand:SDF 2 "register_operand" "<F_r_constraint>")
> +		 (neg:SDF (match_operand:SDF 3 "register_operand" "0"))))]
> +  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA <F_target>"
> +  "vfnms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
> +  [(set_attr "predicable" "yes")
> +   (set_attr "type" "<F_fma_type>")]
> +)
> +
> +(define_insn "*fnmadd<mode>4"
> +  [(set (match_operand:SDF 0 "register_operand" "<F_w_constraint>")
> +	(fma:SDF (neg:SDF (match_operand:SDF 1 "register_operand"  
> +					       "<F_r_constraint>"))
> +		 (match_operand:SDF 2 "register_operand" "<F_r_constraint>")
> +		 (neg:SDF (match_operand:SDF 3 "register_operand" "0"))))]
> +  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA <F_target>"
> +  "vfnma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
> +  [(set_attr "predicable" "yes")
> +   (set_attr "type" "<F_fma_type>")]
> +)
> +
>  
>  ;; Conversion routines
>  
> diff --git a/gcc/testsuite/gcc.target/arm/fma-sp.c b/gcc/testsuite/gcc.target/arm/fma-sp.c
> new file mode 100644
> index 0000000..457b55c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/fma-sp.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mcpu=cortex-m4 -mfpu=fpv4-sp-d16 -mthumb" } */
> +
> +#include "fma.h"
> +
> +/* { dg-final { scan-assembler-not "vfma\.f64\td\[0-9\]" } } */
> +/* { dg-final { scan-assembler-times "vfma\.f32\ts\[0-9\]" 1 } } */
> +/* { dg-final { scan-assembler-not "vfms\.f64\td\[0-9\]" } } */
> +/* { dg-final { scan-assembler-times "vfms\.f32\ts\[0-9\]" 1 } } */
> +/* { dg-final { scan-assembler-not "vfnma\.f64\td\[0-9\]" } } */
> +/* { dg-final { scan-assembler-times "vfnma\.f32\ts\[0-9\]" 1 } } */
> +/* { dg-final { scan-assembler-not "vfnms\.f64\td\[0-9\]" } } */
> +/* { dg-final { scan-assembler-times "vfnms\.f32\ts\[0-9\]" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/arm/fma.c b/gcc/testsuite/gcc.target/arm/fma.c
> new file mode 100644
> index 0000000..1809aa8
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/fma.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -mcpu=cortex-a15 -mfpu=vfpv4" } */
> +
> +#include "fma.h"
> +
> +/* { dg-final { scan-assembler-times "vfma\.f64\td\[0-9\]" 1 } } */
> +/* { dg-final { scan-assembler-times "vfma\.f32\ts\[0-9\]" 1 } } */
> +/* { dg-final { scan-assembler-times "vfms\.f64\td\[0-9\]" 1 } } */
> +/* { dg-final { scan-assembler-times "vfms\.f32\ts\[0-9\]" 1 } } */
> +/* { dg-final { scan-assembler-times "vfnma\.f64\td\[0-9\]" 1 } } */
> +/* { dg-final { scan-assembler-times "vfnma\.f32\ts\[0-9\]" 1 } } */
> +/* { dg-final { scan-assembler-times "vfnms\.f64\td\[0-9\]" 1 } } */
> +/* { dg-final { scan-assembler-times "vfnms\.f32\ts\[0-9\]" 1 } } */
> diff --git a/gcc/testsuite/gcc.target/arm/fma.h b/gcc/testsuite/gcc.target/arm/fma.h
> new file mode 100644
> index 0000000..0812c2d
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/fma.h
> @@ -0,0 +1,50 @@
> +extern double fma (double, double, double);
> +extern float fmaf (float, float, float);
> +
> +float
> +vfma32 (float x, float y, float z)
> +{
> +  return fmaf (x, y, z);
> +}
> +
> +float
> +vfms32 (float x, float y, float z)
> +{
> +  return fmaf (-x, y, z);
> +}
> +
> +float
> +vfnms32 (float x, float y, float z)
> +{
> +  return fmaf (x, y, -z);
> +}
> +
> +float
> +vfnma32 (float x, float y, float z)
> +{
> +  return fmaf (-x, y, -z);
> +}
> +
> +double
> +vfma64 (double x, double y, double z)
> +{
> +  return fma (x, y, z);
> +}
> +
> +double
> +vfms64 (double x, double y, double z)
> +{
> +  return fma (-x, y, z);
> +}
> +
> +double
> +vfnms64 (double x, double y, double z)
> +{
> +  return fma (x, y, -z);
> +}
> +
> +double
> +vfnma64 (double x, double y, double z)
> +{
> +  return fma (-x, y, -z);
> +}
>

Patch

diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 795a5ee..3063f00 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -42,6 +42,9 @@ 
 ;; A list of the 32bit and 64bit integer modes
 (define_mode_iterator SIDI [SI DI])
 
+;; A list of modes which the VFP unit can handle
+(define_mode_iterator SDF [SF DF])
+
 ;; Integer element sizes implemented by IWMMXT.
 (define_mode_iterator VMMX [V2SI V4HI V8QI])
 
@@ -245,7 +248,8 @@ 
                          (V4HI "P") (V8HI  "q")
                          (V2SI "P") (V4SI  "q")
                          (V2SF "P") (V4SF  "q")
-                         (DI   "P") (V2DI  "q")])
+                         (DI   "P") (V2DI  "q")
+			 (SF   "")  (DF    "P")])
 
 ;; Wider modes with the same number of elements.
 (define_mode_attr V_widen [(V8QI "V8HI") (V4HI "V4SI") (V2SI "V2DI")])
@@ -303,7 +307,8 @@ 
                  (V4HI "i16") (V8HI  "i16")
                              (V2SI "i32") (V4SI  "i32")
                              (DI   "i64") (V2DI  "i64")
-                 (V2SF "f32") (V4SF  "f32")])
+                 (V2SF "f32") (V4SF  "f32")
+		 (SF "f32") (DF "f64")])
 
 ;; Same, but for operations which work on signed values.
 (define_mode_attr V_s_elem [(V8QI "s8")  (V16QI "s8")
@@ -423,6 +428,12 @@ 
 ;; Mode attribute for vshll.
 (define_mode_attr V_innermode [(V8QI "QI") (V4HI "HI") (V2SI "SI")])
 
+;; Mode attributes used for fused-multiply-accumulate VFP support
+(define_mode_attr F_w_constraint [(SF "=t") (DF "=w")])
+(define_mode_attr F_r_constraint [(SF "t") (DF "w")])
+(define_mode_attr F_fma_type [(SF "fmacs") (DF "fmacd")])
+(define_mode_attr F_target [(SF "") (DF "&& TARGET_VFP_DOUBLE")])
+
 ;;----------------------------------------------------------------------------
 ;; Code attributes
 ;;----------------------------------------------------------------------------
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index 2061414..2a50353 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -890,6 +890,54 @@ 
    (set_attr "type" "fmacd")]
 )
 
+;; Fused-multiply-accumulate
+
+(define_insn "fma<mode>4"
+  [(set (match_operand:SDF 0 "register_operand" "<F_w_constraint>")
+        (fma:SDF (match_operand:SDF 1 "register_operand" "<F_r_constraint>")
+		 (match_operand:SDF 2 "register_operand" "<F_r_constraint>")
+		 (match_operand:SDF 3 "register_operand" "0")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA <F_target>"
+  "vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "<F_fma_type>")]
+)
+
+(define_insn "*fmsub<mode>4"
+  [(set (match_operand:SDF 0 "register_operand" "<F_w_constraint>")
+	(fma:SDF (neg:SDF (match_operand:SDF 1 "register_operand" 
+					     "<F_r_constraint>"))
+		 (match_operand:SDF 2 "register_operand" "<F_r_constraint>")
+		 (match_operand:SDF 3 "register_operand" "0")))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA <F_target>"
+  "vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "<F_fma_type>")]
+)
+
+(define_insn "*fnmsub<mode>4"
+  [(set (match_operand:SDF 0 "register_operand" "<F_w_constraint>")
+	(fma:SDF (match_operand:SDF 1 "register_operand"  "<F_r_constraint>")
+		 (match_operand:SDF 2 "register_operand" "<F_r_constraint>")
+		 (neg:SDF (match_operand:SDF 3 "register_operand" "0"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA <F_target>"
+  "vfnms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "<F_fma_type>")]
+)
+
+(define_insn "*fnmadd<mode>4"
+  [(set (match_operand:SDF 0 "register_operand" "<F_w_constraint>")
+	(fma:SDF (neg:SDF (match_operand:SDF 1 "register_operand"  
+					       "<F_r_constraint>"))
+		 (match_operand:SDF 2 "register_operand" "<F_r_constraint>")
+		 (neg:SDF (match_operand:SDF 3 "register_operand" "0"))))]
+  "TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA <F_target>"
+  "vfnma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
+  [(set_attr "predicable" "yes")
+   (set_attr "type" "<F_fma_type>")]
+)
+
 
 ;; Conversion routines
 
diff --git a/gcc/testsuite/gcc.target/arm/fma-sp.c b/gcc/testsuite/gcc.target/arm/fma-sp.c
new file mode 100644
index 0000000..457b55c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/fma-sp.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=cortex-m4 -mfpu=fpv4-sp-d16 -mthumb" } */
+
+#include "fma.h"
+
+/* { dg-final { scan-assembler-not "vfma\.f64\td\[0-9\]" } } */
+/* { dg-final { scan-assembler-times "vfma\.f32\ts\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-not "vfms\.f64\td\[0-9\]" } } */
+/* { dg-final { scan-assembler-times "vfms\.f32\ts\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-not "vfnma\.f64\td\[0-9\]" } } */
+/* { dg-final { scan-assembler-times "vfnma\.f32\ts\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-not "vfnms\.f64\td\[0-9\]" } } */
+/* { dg-final { scan-assembler-times "vfnms\.f32\ts\[0-9\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/fma.c b/gcc/testsuite/gcc.target/arm/fma.c
new file mode 100644
index 0000000..1809aa8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/fma.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O2 -mcpu=cortex-a15 -mfpu=vfpv4" } */
+
+#include "fma.h"
+
+/* { dg-final { scan-assembler-times "vfma\.f64\td\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfma\.f32\ts\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfms\.f64\td\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfms\.f32\ts\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnma\.f64\td\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnma\.f32\ts\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnms\.f64\td\[0-9\]" 1 } } */
+/* { dg-final { scan-assembler-times "vfnms\.f32\ts\[0-9\]" 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/fma.h b/gcc/testsuite/gcc.target/arm/fma.h
new file mode 100644
index 0000000..0812c2d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/fma.h
@@ -0,0 +1,50 @@ 
+extern double fma (double, double, double);
+extern float fmaf (float, float, float);
+
+float
+vfma32 (float x, float y, float z)
+{
+  return fmaf (x, y, z);
+}
+
+float
+vfms32 (float x, float y, float z)
+{
+  return fmaf (-x, y, z);
+}
+
+float
+vfnms32 (float x, float y, float z)
+{
+  return fmaf (x, y, -z);
+}
+
+float
+vfnma32 (float x, float y, float z)
+{
+  return fmaf (-x, y, -z);
+}
+
+double
+vfma64 (double x, double y, double z)
+{
+  return fma (x, y, z);
+}
+
+double
+vfms64 (double x, double y, double z)
+{
+  return fma (-x, y, z);
+}
+
+double
+vfnms64 (double x, double y, double z)
+{
+  return fma (x, y, -z);
+}
+
+double
+vfnma64 (double x, double y, double z)
+{
+  return fma (-x, y, -z);
+}