Patchwork [6/N,spu] convert to fma

login
register
mail settings
Submitter Richard Henderson
Date Nov. 12, 2010, 2:05 a.m.
Message ID <20101112020534.GA12839@twiddle.net>
Download mbox | patch
Permalink /patch/70910/
State New
Headers show

Comments

Richard Henderson - Nov. 12, 2010, 2:05 a.m.
This port wasn't using -mfused-madd, and so it doesn't add it as a
deprecated flag.  I do convert the existing fma insns to use the 
fma opcode.

There is one possible problem: the description of the fnms insn
does not match what I would expect from IBM, nor does it match
the dfnms insn.  It's that latter point that concerns me mostly.

The patch builds a cross-compiler, but is otherwise untested.


r~

Patch

diff --git a/gcc/config/spu/spu-builtins.def b/gcc/config/spu/spu-builtins.def
index 9e92781..0687707 100644
--- a/gcc/config/spu/spu-builtins.def
+++ b/gcc/config/spu/spu-builtins.def
@@ -171,13 +171,13 @@  DEF_BUILTIN (SI_FS,          CODE_FOR_subv4sf3,      "si_fs",          B_INSN,
 DEF_BUILTIN (SI_DFS,         CODE_FOR_subv2df3,      "si_dfs",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
 DEF_BUILTIN (SI_FM,          CODE_FOR_mulv4sf3,      "si_fm",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
 DEF_BUILTIN (SI_DFM,         CODE_FOR_mulv2df3,      "si_dfm",         B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
-DEF_BUILTIN (SI_FMA,         CODE_FOR_fma_v4sf,      "si_fma",         B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
-DEF_BUILTIN (SI_DFMA,        CODE_FOR_fma_v2df,      "si_dfma",        B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
-DEF_BUILTIN (SI_DFNMA,       CODE_FOR_fnma_v2df,     "si_dfnma",       B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
-DEF_BUILTIN (SI_FNMS,        CODE_FOR_fnms_v4sf,     "si_fnms",        B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
-DEF_BUILTIN (SI_DFNMS,       CODE_FOR_fnms_v2df,     "si_dfnms",       B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
-DEF_BUILTIN (SI_FMS,         CODE_FOR_fms_v4sf,      "si_fms",         B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
-DEF_BUILTIN (SI_DFMS,        CODE_FOR_fms_v2df,      "si_dfms",        B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FMA,         CODE_FOR_fmav4sf4,      "si_fma",         B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFMA,        CODE_FOR_fmav2df4,      "si_dfma",        B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFNMA,       CODE_FOR_nfmav2df4,     "si_dfnma",       B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FNMS,        CODE_FOR_fnmav4sf4,     "si_fnms",        B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFNMS,       CODE_FOR_nfmsv2df4,     "si_dfnms",       B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_FMS,         CODE_FOR_fmsv4sf4,      "si_fms",         B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
+DEF_BUILTIN (SI_DFMS,        CODE_FOR_fmsv2df4,      "si_dfms",        B_INSN,   _A4(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
 DEF_BUILTIN (SI_FREST,       CODE_FOR_frest_v4sf,    "si_frest",       B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
 DEF_BUILTIN (SI_FRSQEST,     CODE_FOR_frsqest_v4sf,  "si_frsqest",     B_INSN,   _A2(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
 DEF_BUILTIN (SI_FI,          CODE_FOR_fi_v4sf,       "si_fi",          B_INSN,   _A3(SPU_BTI_QUADWORD, SPU_BTI_QUADWORD, SPU_BTI_QUADWORD))
@@ -242,7 +242,7 @@  DEF_BUILTIN (SPU_MULH,       CODE_FOR_spu_mpyh,       "spu_mulh",       B_INSN,
 DEF_BUILTIN (SPU_MULSR,      CODE_FOR_spu_mpys,       "spu_mulsr",      B_INSN,     _A3(SPU_BTI_V4SI,     SPU_BTI_V8HI,   SPU_BTI_V8HI))
 DEF_BUILTIN (SPU_FREST,      CODE_FOR_frest_v4sf,     "spu_frest",      B_INSN,     _A2(SPU_BTI_V4SF,     SPU_BTI_V4SF))
 DEF_BUILTIN (SPU_FRSQEST,    CODE_FOR_frsqest_v4sf,   "spu_frsqest",    B_INSN,     _A2(SPU_BTI_V4SF,     SPU_BTI_V4SF))
-DEF_BUILTIN (SPU_NMADD,      CODE_FOR_fnma_v2df,      "spu_nmadd",      B_INSN,     _A4(SPU_BTI_V2DF,     SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_NMADD,      CODE_FOR_nfmav2df4,      "spu_nmadd",      B_INSN,     _A4(SPU_BTI_V2DF,     SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
 DEF_BUILTIN (SPU_ABSD,       CODE_FOR_spu_absdb,      "spu_absd",       B_INSN,     _A3(SPU_BTI_UV16QI,   SPU_BTI_UV16QI, SPU_BTI_UV16QI))
 DEF_BUILTIN (SPU_AVG,        CODE_FOR_spu_avgb,       "spu_avg",        B_INSN,     _A3(SPU_BTI_UV16QI,   SPU_BTI_UV16QI, SPU_BTI_UV16QI))
 DEF_BUILTIN (SPU_SUMB,       CODE_FOR_spu_sumb,       "spu_sumb",       B_INSN,     _A3(SPU_BTI_UV8HI,    SPU_BTI_UV16QI, SPU_BTI_UV16QI))
@@ -286,11 +286,11 @@  DEF_BUILTIN (SPU_GENCX_0,          CODE_FOR_cgx_v4si,      "spu_gencx_0",
 DEF_BUILTIN (SPU_GENCX_1,          CODE_FOR_cgx_v4si,      "spu_gencx_1",          B_INTERNAL, _A4(SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI,  SPU_BTI_UV4SI))
 DEF_BUILTIN (SPU_MADD,             CODE_FOR_nothing,       "spu_madd",             B_OVERLOAD, _A1(SPU_BTI_VOID))
 DEF_BUILTIN (SPU_MADD_0,           CODE_FOR_spu_mpya,      "spu_madd_0",           B_INTERNAL, _A4(SPU_BTI_V4SI,   SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V4SI))
-DEF_BUILTIN (SPU_MADD_1,           CODE_FOR_fma_v4sf,      "spu_madd_1",           B_INTERNAL, _A4(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
-DEF_BUILTIN (SPU_MADD_2,           CODE_FOR_fma_v2df,      "spu_madd_2",           B_INTERNAL, _A4(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_MADD_1,           CODE_FOR_fmav4sf4,      "spu_madd_1",           B_INTERNAL, _A4(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_MADD_2,           CODE_FOR_fmav2df4,      "spu_madd_2",           B_INTERNAL, _A4(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
 DEF_BUILTIN (SPU_MSUB,             CODE_FOR_nothing,       "spu_msub",             B_OVERLOAD, _A1(SPU_BTI_VOID))
-DEF_BUILTIN (SPU_MSUB_0,           CODE_FOR_fms_v4sf,      "spu_msub_0",           B_INTERNAL, _A4(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
-DEF_BUILTIN (SPU_MSUB_1,           CODE_FOR_fms_v2df,      "spu_msub_1",           B_INTERNAL, _A4(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_MSUB_0,           CODE_FOR_fmsv4sf4,      "spu_msub_0",           B_INTERNAL, _A4(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_MSUB_1,           CODE_FOR_fmsv2df4,      "spu_msub_1",           B_INTERNAL, _A4(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
 DEF_BUILTIN (SPU_MHHADD,           CODE_FOR_nothing,       "spu_mhhadd",           B_OVERLOAD, _A1(SPU_BTI_VOID))
 DEF_BUILTIN (SPU_MHHADD_0,         CODE_FOR_spu_mpyhhau,   "spu_mhhadd_0",         B_INTERNAL, _A4(SPU_BTI_UV4SI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV4SI))
 DEF_BUILTIN (SPU_MHHADD_1,         CODE_FOR_spu_mpyhha,    "spu_mhhadd_1",         B_INTERNAL, _A4(SPU_BTI_V4SI,   SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V4SI))
@@ -306,8 +306,8 @@  DEF_BUILTIN (SPU_MULO_1,           CODE_FOR_spu_mpyu,      "spu_mulo_1",
 DEF_BUILTIN (SPU_MULO_2,           CODE_FOR_spu_mpy,       "spu_mulo_2",           B_INTERNAL, _A3(SPU_BTI_V4SI,   SPU_BTI_V8HI,   SPU_BTI_INTHI))
 DEF_BUILTIN (SPU_MULO_3,           CODE_FOR_spu_mpyu,      "spu_mulo_3",           B_INTERNAL, _A3(SPU_BTI_UV4SI,  SPU_BTI_UV8HI,  SPU_BTI_UINTHI))
 DEF_BUILTIN (SPU_NMSUB,            CODE_FOR_nothing,       "spu_nmsub",            B_OVERLOAD, _A1(SPU_BTI_VOID))
-DEF_BUILTIN (SPU_NMSUB_0,          CODE_FOR_fnms_v4sf,     "spu_nmsub_0",          B_INTERNAL, _A4(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
-DEF_BUILTIN (SPU_NMSUB_1,          CODE_FOR_fnms_v2df,     "spu_nmsub_1",          B_INTERNAL, _A4(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
+DEF_BUILTIN (SPU_NMSUB_0,          CODE_FOR_fnmav4sf4,     "spu_nmsub_0",          B_INTERNAL, _A4(SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF,   SPU_BTI_V4SF))
+DEF_BUILTIN (SPU_NMSUB_1,          CODE_FOR_nfmsv2df4,     "spu_nmsub_1",          B_INTERNAL, _A4(SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF,   SPU_BTI_V2DF))
 DEF_BUILTIN (SPU_SUB,              CODE_FOR_nothing,       "spu_sub",              B_OVERLOAD, _A1(SPU_BTI_VOID))
 DEF_BUILTIN (SPU_SUB_0,            CODE_FOR_subv8hi3,      "spu_sub_0",            B_INTERNAL, _A3(SPU_BTI_UV8HI,  SPU_BTI_UV8HI,  SPU_BTI_UV8HI))
 DEF_BUILTIN (SPU_SUB_1,            CODE_FOR_subv8hi3,      "spu_sub_1",            B_INTERNAL, _A3(SPU_BTI_V8HI,   SPU_BTI_V8HI,   SPU_BTI_V8HI))
@@ -771,8 +771,8 @@  DEF_BUILTIN (SPU_PROMOTE_9,        CODE_FOR_spu_promote,   "spu_promote_9",
 
 /* These are for the convenience of implementing fma() in the standard
    libraries.  */
-DEF_BUILTIN (SCALAR_FMA,           CODE_FOR_fma_sf,        "fmas",                 B_INSN,     _A4(SPU_BTI_FLOAT,  SPU_BTI_FLOAT, SPU_BTI_FLOAT, SPU_BTI_FLOAT))
-DEF_BUILTIN (SCALAR_DFMA,          CODE_FOR_fma_df,        "dfmas",                B_INSN,     _A4(SPU_BTI_DOUBLE, SPU_BTI_DOUBLE, SPU_BTI_DOUBLE, SPU_BTI_DOUBLE))
+DEF_BUILTIN (SCALAR_FMA,           CODE_FOR_fmasf4,        "fmas",                 B_INSN,     _A4(SPU_BTI_FLOAT,  SPU_BTI_FLOAT, SPU_BTI_FLOAT, SPU_BTI_FLOAT))
+DEF_BUILTIN (SCALAR_DFMA,          CODE_FOR_fmadf4,        "dfmas",                B_INSN,     _A4(SPU_BTI_DOUBLE, SPU_BTI_DOUBLE, SPU_BTI_DOUBLE, SPU_BTI_DOUBLE))
 
 DEF_BUILTIN (SPU_ALIGN_HINT,       CODE_FOR_spu_align_hint,"spu_align_hint",       B_INSN,     _A4(SPU_BTI_VOID,   SPU_BTI_PTR,    SPU_BTI_7,      SPU_BTI_7))
 #undef _A1
diff --git a/gcc/config/spu/spu.md b/gcc/config/spu/spu.md
index 181d0db..528a07c 100644
--- a/gcc/config/spu/spu.md
+++ b/gcc/config/spu/spu.md
@@ -748,7 +748,7 @@ 
 
     emit_move_insn (operands[4],
 		    CONST_DOUBLE_FROM_REAL_VALUE (scale, SFmode));
-    emit_insn (gen_fma_sf (operands[0],
+    emit_insn (gen_fmasf4 (operands[0],
 			   operands[2], operands[4], operands[3]));
     DONE;
   })
@@ -1533,69 +1533,98 @@ 
   "<d>fm\t%0,%1,%2"
   [(set_attr "type" "fp<d6>")])
 
-(define_insn "fma_<mode>"
+(define_insn "fma<mode>4"
   [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
-	(plus:VSF (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
-			      (match_operand:VSF 2 "spu_reg_operand" "r"))
-		   (match_operand:VSF 3 "spu_reg_operand" "r")))]
+	(fma:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
+		 (match_operand:VSF 2 "spu_reg_operand" "r")
+		 (match_operand:VSF 3 "spu_reg_operand" "r")))]
   ""
   "fma\t%0,%1,%2,%3"
   [(set_attr "type"	"fp6")])
 
-(define_insn "fnms_<mode>"
+;; ??? The official description is (c - a*b), which is exactly (-a*b + c).
+;; Note that this doesn't match the dfnms description.  Incorrect?
+(define_insn "fnma<mode>4"
   [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
-	(minus:VSF (match_operand:VSF 3 "spu_reg_operand" "r")
-		    (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
-			       (match_operand:VSF 2 "spu_reg_operand" "r"))))]
+	(fma:VSF
+	  (neg:VSF (match_operand:VSF 1 "spu_reg_operand" "r"))
+	  (match_operand:VSF 2 "spu_reg_operand" "r")
+	  (match_operand:VSF 3 "spu_reg_operand" "r")))]
   ""
   "fnms\t%0,%1,%2,%3"
   [(set_attr "type" "fp6")])
 
-(define_insn "fms_<mode>"
+(define_insn "fms<mode>4"
   [(set (match_operand:VSF 0 "spu_reg_operand" "=r")
-	(minus:VSF (mult:VSF (match_operand:VSF 1 "spu_reg_operand" "r")
-			       (match_operand:VSF 2 "spu_reg_operand" "r"))
-		    (match_operand:VSF 3 "spu_reg_operand" "r")))]
+	(fma:VSF
+	  (match_operand:VSF 1 "spu_reg_operand" "r")
+	  (match_operand:VSF 2 "spu_reg_operand" "r")
+	  (neg:VSF (match_operand:VSF 3 "spu_reg_operand" "r"))))]
   ""
   "fms\t%0,%1,%2,%3"
   [(set_attr "type" "fp6")])
 
-(define_insn "fma_<mode>"
+(define_insn "fma<mode>4"
   [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
-	(plus:VDF (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
-			    (match_operand:VDF 2 "spu_reg_operand" "r"))
-		  (match_operand:VDF 3 "spu_reg_operand" "0")))]
+	(fma:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
+		 (match_operand:VDF 2 "spu_reg_operand" "r")
+		 (match_operand:VDF 3 "spu_reg_operand" "0")))]
   ""
   "dfma\t%0,%1,%2"
   [(set_attr "type"	"fpd")])
 
-(define_insn "fnma_<mode>"
+(define_insn "fms<mode>4"
   [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
-	(neg:VDF (plus:VDF (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
-				     (match_operand:VDF 2 "spu_reg_operand" "r"))
-			   (match_operand:VDF 3 "spu_reg_operand" "0"))))]
+	(fma:VDF
+	  (match_operand:VDF 1 "spu_reg_operand" "r")
+	  (match_operand:VDF 2 "spu_reg_operand" "r")
+	  (neg:VDF (match_operand:VDF 3 "spu_reg_operand" "0"))))]
+  ""
+  "dfms\t%0,%1,%2"
+  [(set_attr "type" "fpd")])
+
+(define_insn "nfma<mode>4"
+  [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
+	(neg:VDF
+	  (fma:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
+		   (match_operand:VDF 2 "spu_reg_operand" "r")
+		   (match_operand:VDF 3 "spu_reg_operand" "0"))))]
   ""
   "dfnma\t%0,%1,%2"
   [(set_attr "type"	"fpd")])
 
-(define_insn "fnms_<mode>"
+(define_insn "nfms<mode>4"
   [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
-	(minus:VDF (match_operand:VDF 3 "spu_reg_operand" "0")
-		   (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
-			     (match_operand:VDF 2 "spu_reg_operand" "r"))))]
+	(neg:VDF
+	  (fma:VDF
+	    (match_operand:VDF 1 "spu_reg_operand" "r")
+	    (match_operand:VDF 2 "spu_reg_operand" "r")
+	    (neg:VDF (match_operand:VDF 3 "spu_reg_operand" "0")))))]
   ""
   "dfnms\t%0,%1,%2"
   [(set_attr "type" "fpd")])
 
-(define_insn "fms_<mode>"
-  [(set (match_operand:VDF 0 "spu_reg_operand" "=r")
-	(minus:VDF (mult:VDF (match_operand:VDF 1 "spu_reg_operand" "r")
-			     (match_operand:VDF 2 "spu_reg_operand" "r"))
-		   (match_operand:VDF 3 "spu_reg_operand" "0")))]
-  ""
-  "dfms\t%0,%1,%2"
-  [(set_attr "type" "fpd")])
+;; If signed zeros are ignored, -(a * b - c) = -a * b + c.
+(define_expand "fnma<mode>4"
+  [(set (match_operand:VDF 0 "spu_reg_operand" "")
+	(neg:VDF
+	  (fma:VDF
+	    (match_operand:VDF 1 "spu_reg_operand" "")
+	    (match_operand:VDF 2 "spu_reg_operand" "")
+	    (neg:VDF (match_operand:VDF 3 "spu_reg_operand" "")))))]
+  "!HONOR_SIGNED_ZEROS (<MODE>mode)"
+  "")
 
+;; If signed zeros are ignored, -(a * b + c) = -a * b - c.
+(define_expand "fnms<mode>4"
+  [(set (match_operand:VDF 0 "register_operand" "")
+	(neg:VDF
+	  (fma:VDF
+	    (match_operand:VDF 1 "register_operand" "")
+	    (match_operand:VDF 2 "register_operand" "")
+	    (match_operand:VDF 3 "register_operand" ""))))]
+  "!HONOR_SIGNED_ZEROS (<MODE>mode)"
+  "")
 
 ;; mul highpart, used for divide by constant optimizations.
 
@@ -1845,8 +1874,8 @@ 
     emit_insn (gen_frest_<mode>(operands[3], operands[2]));
     emit_insn (gen_fi_<mode>(operands[3], operands[2], operands[3]));
     emit_insn (gen_mul<mode>3(operands[4], operands[1], operands[3]));
-    emit_insn (gen_fnms_<mode>(operands[0], operands[4], operands[2], operands[1]));
-    emit_insn (gen_fma_<mode>(operands[0], operands[0], operands[3], operands[4]));
+    emit_insn (gen_fnma<mode>4(operands[0], operands[4], operands[2], operands[1]));
+    emit_insn (gen_fma<mode>4(operands[0], operands[0], operands[3], operands[4]));
     DONE;
   })
 
@@ -1870,8 +1899,8 @@ 
     emit_insn (gen_frest_<mode> (operands[3], operands[2]));
     emit_insn (gen_fi_<mode> (operands[3], operands[2], operands[3]));
     emit_insn (gen_mul<mode>3 (operands[4], operands[1], operands[3]));
-    emit_insn (gen_fnms_<mode> (operands[5], operands[4], operands[2], operands[1]));
-    emit_insn (gen_fma_<mode> (operands[3], operands[5], operands[3], operands[4]));
+    emit_insn (gen_fnma<mode>4 (operands[5], operands[4], operands[2], operands[1]));
+    emit_insn (gen_fma<mode>4 (operands[3], operands[5], operands[3], operands[4]));
 
    /* Due to truncation error, the quotient result may be low by 1 ulp.
       Conditionally add one if the estimate is too small in magnitude.  */
@@ -1885,7 +1914,7 @@ 
     emit_insn (gen_add<f2i>3 (gen_lowpart (<F2I>mode, operands[4]),
 			      gen_lowpart (<F2I>mode, operands[3]),
 			      spu_const (<F2I>mode, 1)));
-    emit_insn (gen_fnms_<mode> (operands[0], operands[2], operands[4], operands[1]));
+    emit_insn (gen_fnma<mode>4 (operands[0], operands[2], operands[4], operands[1]));
     emit_insn (gen_mul<mode>3 (operands[0], operands[0], operands[5]));
     emit_insn (gen_cgt_<f2i> (gen_lowpart (<F2I>mode, operands[0]),
 			      gen_lowpart (<F2I>mode, operands[0]),
@@ -1920,8 +1949,8 @@ 
     emit_insn (gen_fi_sf(operands[2],operands[1],operands[2]));
     emit_insn (gen_mulsf3(operands[5],operands[2],operands[1]));
     emit_insn (gen_mulsf3(operands[3],operands[5],operands[3]));
-    emit_insn (gen_fnms_sf(operands[4],operands[2],operands[5],operands[4]));
-    emit_insn (gen_fma_sf(operands[0],operands[4],operands[3],operands[5]));
+    emit_insn (gen_fnmasf4(operands[4],operands[2],operands[5],operands[4]));
+    emit_insn (gen_fmasf4(operands[0],operands[4],operands[3],operands[5]));
     DONE;
   })