Patchwork [SH] PR 53511 - Add support for fma patterns

login
register
mail settings
Submitter Oleg Endo
Date June 10, 2012, 2:33 p.m.
Message ID <1339338839.2300.14.camel@yam-132-YW-E178-FTW>
Download mbox | patch
Permalink /patch/164002/
State New
Headers show

Comments

Oleg Endo - June 10, 2012, 2:33 p.m.
Hello,

The attached patch adds support for the fmasf4 pattern on SH.
Tested against rev 188280 with 

make info dvi pdf

make -k check RUNTESTFLAGS="--target_board=sh-sim
\{-m2/-ml,-m2/-mb,-m2a/-mb,-m2a-single/-mb,-m4/-ml,-m4/-mb,
-m4-single/-ml,-m4-single/-mb,-m4a-single/-ml,-m4a-single/-mb}"

and no new failures, except for the recently added PR 51340 test cases.
I'd like to remove those in another patch because they don't make much
sense anymore after this patch.

Cheers,
Oleg

ChangeLog:

	PR target/53511
	* config/sh/sh.md (fmasf4): New expander.
	(*macsf3): Rename to fmasf4_i.  Adapt to fma pattern.
	(mac_media): Rename to fmasf4_media.  Adapt to fma pattern.
	* config/sh/sh.opt (mfused-madd): Remove.
	* config/sh/sh.c (sh_option_override): Remove mfused-madd
	handling.
	(builtin_description bdesc): Remove __builtin_sh_media_FMAC_S.
	* config.gcc (sh[123456789lbe]*-*-* | sh-*-*): Add
	fused-madd.opt as extra options.
	* doc/invoke.texi (SH Options): Update mfused-madd and
	mno-fused-madd descriptions.
	
testsuite/ChangeLog:
	
	PR target/53511
	* gcc.target/sh/pr53511-1.c: New.
Kaz Kojima - June 11, 2012, 1:28 a.m.
Oleg Endo <oleg.endo@t-online.de> wrote:
> The attached patch adds support for the fmasf4 pattern on SH.
> Tested against rev 188280 with 
> 
> make info dvi pdf
> 
> make -k check RUNTESTFLAGS="--target_board=sh-sim
> \{-m2/-ml,-m2/-mb,-m2a/-mb,-m2a-single/-mb,-m4/-ml,-m4/-mb,
> -m4-single/-ml,-m4-single/-mb,-m4a-single/-ml,-m4a-single/-mb}"
> 
> and no new failures, except for the recently added PR 51340 test cases.
> I'd like to remove those in another patch because they don't make much
> sense anymore after this patch.

OK.

Regards,
	kaz

Patch

Index: gcc/config/sh/sh.md
===================================================================
--- gcc/config/sh/sh.md	(revision 188280)
+++ gcc/config/sh/sh.md	(working copy)
@@ -10329,6 +10329,9 @@ 
   "fmul.s	%1, %2, %0"
   [(set_attr "type" "fparith_media")])
 
+;; FIXME: These fmac combine pass assisting specifics are obsolete since 
+;;	  we now use the FMA patterns, which do not depend on the combine
+;;	  pass anymore.
 ;; Unfortunately, the combiner is unable to cope with the USE of the FPSCR
 ;; register in feeding fp instructions.  Thus, in order to generate fmac,
 ;; we start out with a mulsf pattern that does not depend on fpscr.
@@ -10359,26 +10362,42 @@ 
   [(set_attr "type" "fp")
    (set_attr "fp_mode" "single")])
 
-(define_insn "mac_media"
-  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
-	(plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f")
-			  (match_operand:SF 2 "fp_arith_reg_operand" "f"))
-		 (match_operand:SF 3 "fp_arith_reg_operand" "0")))]
-  "TARGET_SHMEDIA_FPU && TARGET_FMAC"
-  "fmac.s %1, %2, %0"
-  [(set_attr "type" "fparith_media")])
+;; FMA (fused multiply-add) patterns
+(define_expand "fmasf4"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
+	(fma:SF (match_operand:SF 1 "fp_arith_reg_operand" "")
+		(match_operand:SF 2 "fp_arith_reg_operand" "")
+		(match_operand:SF 3 "fp_arith_reg_operand" "")))]
+  "TARGET_SH2E || TARGET_SHMEDIA_FPU"
+{
+  if (TARGET_SH2E)
+    {
+      emit_sf_insn (gen_fmasf4_i (operands[0], operands[1], operands[2],
+				  operands[3], get_fpscr_rtx ()));
+      DONE;
+    }
+})
 
-(define_insn "*macsf3"
+(define_insn "fmasf4_i"
   [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
-	(plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%w")
-			  (match_operand:SF 2 "fp_arith_reg_operand" "f"))
-		 (match_operand:SF 3 "arith_reg_operand" "0")))
+	(fma:SF (match_operand:SF 1 "fp_arith_reg_operand" "w")
+		(match_operand:SF 2 "fp_arith_reg_operand" "f")
+		(match_operand:SF 3 "fp_arith_reg_operand" "0")))
    (use (match_operand:PSI 4 "fpscr_operand" "c"))]
-  "TARGET_SH2E && TARGET_FMAC"
-  "fmac	fr0,%2,%0"
+  "TARGET_SH2E"
+  "fmac	%1,%2,%0"
   [(set_attr "type" "fp")
    (set_attr "fp_mode" "single")])
 
+(define_insn "fmasf4_media"
+  [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+	(fma:SF (match_operand:SF 1 "fp_arith_reg_operand" "f")
+		(match_operand:SF 2 "fp_arith_reg_operand" "f")
+		(match_operand:SF 3 "fp_arith_reg_operand" "0")))]
+  "TARGET_SHMEDIA_FPU"
+  "fmac.s %1, %2, %0"
+  [(set_attr "type" "fparith_media")])
+
 (define_expand "divsf3"
   [(set (match_operand:SF 0 "arith_reg_operand" "")
 	(div:SF (match_operand:SF 1 "arith_reg_operand" "")
Index: gcc/config/sh/sh.opt
===================================================================
--- gcc/config/sh/sh.opt	(revision 188280)
+++ gcc/config/sh/sh.opt	(working copy)
@@ -257,10 +257,6 @@ 
 Target RejectNegative Joined Var(sh_fixed_range_str)
 Specify range of registers to make fixed
 
-mfused-madd
-Target Var(TARGET_FMAC)
-Enable the use of the fused floating point multiply-accumulate operation
-
 mgettrcost=
 Target RejectNegative Joined UInteger Var(sh_gettrcost) Init(-1)
 Cost to assume for gettr insn
Index: gcc/config/sh/sh.c
===================================================================
--- gcc/config/sh/sh.c	(revision 188280)
+++ gcc/config/sh/sh.c	(working copy)
@@ -878,13 +878,6 @@ 
 
   if (flag_unsafe_math_optimizations)
     {
-      /* Enable fmac insn for "a * b + c" SFmode calculations when -ffast-math
-	 is enabled and -mno-fused-madd is not specified by the user.
-	 The fmac insn can't be enabled by default due to the implied
-	 FMA semantics.   See also PR target/29100.  */
-      if (global_options_set.x_TARGET_FMAC == 0)
-	TARGET_FMAC = 1;
-
       /* Enable fsca insn for SH4A if not otherwise specified by the user.  */
       if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
 	TARGET_FSCA = 1;
@@ -11231,7 +11224,6 @@ 
   { CODE_FOR_fsina_s,	"__builtin_sh_media_FSINA_S", SH_BLTIN_SISF, 0 },
   { CODE_FOR_fipr,	"__builtin_sh_media_FIPR_S", SH_BLTIN_3, 0 },
   { CODE_FOR_ftrv,	"__builtin_sh_media_FTRV_S", SH_BLTIN_3, 0 },
-  { CODE_FOR_mac_media,	"__builtin_sh_media_FMAC_S", SH_BLTIN_3, 0 },
   { CODE_FOR_sqrtdf2,	"__builtin_sh_media_FSQRT_D", SH_BLTIN_2, 0 },
   { CODE_FOR_sqrtsf2,	"__builtin_sh_media_FSQRT_S", SH_BLTIN_2, 0 },
   { CODE_FOR_fsrra_s,	"__builtin_sh_media_FSRRA_S", SH_BLTIN_2, 0 },
Index: gcc/doc/invoke.texi
===================================================================
--- gcc/doc/invoke.texi	(revision 188280)
+++ gcc/doc/invoke.texi	(working copy)
@@ -18307,14 +18307,12 @@ 
 @itemx -mno-fused-madd
 @opindex mfused-madd
 @opindex mno-fused-madd
-If the processor type supports it, setting @code{-mfused-madd} will allow the
-usage of the @code{fmac} instruction (floating-point multiply-accumulate) for
-regular calculations.  Enabling this option might generate faster code but also
-produce different numeric floating-point results compared to strict IEEE 754
-arithmetic.  @code{-mfused-madd} is enabled by default by option
-@option{-funsafe-math-optimizations}.  Setting @code{-mno-fused-madd} will
-disallow the usage of the @code{fmac} instruction for regular calculations
-even if @option{-funsafe-math-optimizations} is in effect.
+Generate code that uses (does not use) the floating-point multiply and
+accumulate instructions.  These instructions are generated by default
+if hardware floating point is used.  The machine-dependent
+@option{-mfused-madd} option is now mapped to the machine-independent
+@option{-ffp-contract=fast} option, and @option{-mno-fused-madd} is
+mapped to @option{-ffp-contract=off}.
 
 @item -mfsca
 @itemx -mno-fsca
Index: gcc/testsuite/gcc.target/sh/pr53511-1.c
===================================================================
--- gcc/testsuite/gcc.target/sh/pr53511-1.c	(revision 0)
+++ gcc/testsuite/gcc.target/sh/pr53511-1.c	(revision 0)
@@ -0,0 +1,14 @@ 
+/* Verify that the fmac insn is used for the standard fmaf function.  */
+/* { dg-do compile { target "sh*-*-*" } } */
+/* { dg-options "-O1" } */
+/* { dg-skip-if "" { "sh*-*-*" } { "-m1" "-m2*" "-m4al" "*nofpu" "-m4-340*" "-m4-400*" "-m4-500*" "-m5*" } { "" } }  */
+/* { dg-final { scan-assembler "fmac" } } */
+
+#include <math.h>
+
+float
+test_func_00 (float a, float b, float c)
+{
+  return fmaf (a, b, c);
+}
+
Index: gcc/config.gcc
===================================================================
--- gcc/config.gcc	(revision 188280)
+++ gcc/config.gcc	(working copy)
@@ -449,6 +449,7 @@ 
 sh[123456789lbe]*-*-* | sh-*-*)
 	cpu_type=sh
 	need_64bit_hwint=yes
+	extra_options="${extra_options} fused-madd.opt"
 	;;
 v850*-*-*)
 	cpu_type=v850