diff mbox

PR 46353, fix FMA breakage on powerpc

Message ID 20101111001648.GA16293@hungry-tiger.westford.ibm.com
State New
Headers show

Commit Message

Michael Meissner Nov. 11, 2010, 12:16 a.m. UTC
Richard Henderson is evidently working on a system wide cleanup of the FMA
issues.

This particular patch fixes the FMA issues on the powerpc.  Note, I don't seem
to be able to test the paired patches at this point, so I'm providing two
patches.  One patch fixes the normal power/vsx/altivec side of things.  It
fixes the ppc-fma-*.c and ppc-fmadd-*.c tests that were broken with no other
regressions.  The second patch adds the same changes to the paired.md file.

Ideally, my patch and Richard's will be harmonized to be checked in.  It would
be useful if somebody has a linuxpaired setup could check the paired changes on
their system.

2010-11-10  Michael Meissner  <meissner@linux.vnet.ibm.com>

	PR target/46353
	* config/rs6000/vector.md (fma<mode>4): Define vector versions of
	the fma patterns for Altivec, VSX, and paired floating point.
	(fms<mode>4): Ditto.
	(fnma<mode>4): Ditto.
	(fnms<mode>4): Ditto.

	* config/rs6000/rs6000.opt (-mfused-madd): Make -mfused-madd an
	alias for -ffp-contract=fast, and -mno-fused-madd an alias for
	-ffp-contract=off.

	* config/rs6000/vsx.md (vsx_fnmadd<mode>4_3): New combiner insn
	for fused multiply/add with the multiply negated for use with
	-ffast-math.
	(vsx_fnmsub<mode>4_3): Ditto.
	* config/rs6000/altivec.md (altivec_vnmsubfp_3): Ditto.

	* config/rs6000/rs6000.h (TARGET_FUSED_MADD): Define in terms of
	-ffp-contract.

	* config/rs6000/rs6000.md (fmssf4): Rename to use standard name
	for fused multiply/add instructions.  Add -ffast-math support for
	negating the multiply during the fma.
	(fnmasf4): Ditto.
	(fnmssf4): Ditto.
	(fmadf4): Use 'd' constraint for double, not 'f'.
	(fmsdf4): Use 'd' constraint for double, not 'f'.  Use standard
	name for fused multiply/add instructions.
	(fnmadf4): Ditto.
	(fnmadf4_fpr_1): Ditto.
	(fnmsdf4): Ditto.
	(fnmsdf4_fpr_1): Ditto.
	(fnmasf4_2): New combiner insn for fused multiply/add with the
	multiply negated for use with -ffast-math.
	(fnmssf4_2): Ditto.
	(fnmadf4_fpr_2): Ditto.
	(fnmsdf4_fpr_2): Ditto.

	* config/rs6000/paired.md (paired_madd): Rewrite to use fma.
	(paired_msub): Ditto.
	(paired_madd_2): Rename combiner pattern.
	(paired_msub_2): Ditto.
	(paired_nmadd_2): New pattern to use fma.
	(paired_nmadd_3): Ditto.
	(paired_nmsub_2): Ditto.
	(paired_nmsub_3): Ditto.
diff mbox

Patch

Index: gcc/config/rs6000/vector.md
===================================================================
--- gcc/config/rs6000/vector.md	(revision 166510)
+++ gcc/config/rs6000/vector.md	(working copy)
@@ -233,6 +233,38 @@  (define_expand "neg<mode>2"
     }
 }")
 
+(define_expand "fma<mode>4"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(fma:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
+		   (match_operand:VEC_F 2 "vfloat_operand" "")
+		   (match_operand:VEC_F 3 "vfloat_operand" "")))]
+  "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "fms<mode>4"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(fma:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")
+		   (match_operand:VEC_F 2 "vfloat_operand" "")
+		   (neg:VEC_F (match_operand:VEC_F 3 "vfloat_operand" ""))))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode)"
+  "")
+
+(define_expand "fnma<mode>4"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(fma:VEC_F (neg:VEC_F (match_operand:VEC_F 1 "vfloat_operand" ""))
+		   (match_operand:VEC_F 2 "vfloat_operand" "")
+		   (match_operand:VEC_F 3 "vfloat_operand" "")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode) && !HONOR_SIGNED_ZEROS (<MODE>mode)"
+  "")
+
+(define_expand "fnms<mode>4"
+  [(set (match_operand:VEC_F 0 "vfloat_operand" "")
+	(fma:VEC_F (neg:VEC_F (match_operand:VEC_F 1 "vfloat_operand" ""))
+		   (match_operand:VEC_F 2 "vfloat_operand" "")
+		   (neg:VEC_F (match_operand:VEC_F 3 "vfloat_operand" ""))))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode) && !HONOR_SIGNED_ZEROS (<MODE>mode)"
+  "")
+
 (define_expand "abs<mode>2"
   [(set (match_operand:VEC_F 0 "vfloat_operand" "")
 	(abs:VEC_F (match_operand:VEC_F 1 "vfloat_operand" "")))]
Index: gcc/config/rs6000/rs6000.opt
===================================================================
--- gcc/config/rs6000/rs6000.opt	(revision 166510)
+++ gcc/config/rs6000/rs6000.opt	(working copy)
@@ -177,8 +177,8 @@  Target Report Var(TARGET_AVOID_XFORM) In
 Avoid generation of indexed load/store instructions when possible
 
 mfused-madd
-Target Report Var(TARGET_FUSED_MADD) Init(1)
-Generate fused multiply/add instructions
+Target Alias(ffp-contract=, fast, off)
+Generate fused multiply/add instructions.  Same as -ffp-contract=fast
 
 mtls-markers
 Target Report Var(tls_markers) Init(1)
Index: gcc/config/rs6000/vsx.md
===================================================================
--- gcc/config/rs6000/vsx.md	(revision 166510)
+++ gcc/config/rs6000/vsx.md	(working copy)
@@ -666,6 +666,23 @@  (define_insn "vsx_fnmadd<mode>4_2"
   [(set_attr "type" "<VStype_mul>")
    (set_attr "fp_type" "<VSfptype_mul>")])
 
+(define_insn "*vsx_fnmadd<mode>4_3"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
+	(fma:VSX_B
+	 (neg:VSX_B
+	  (match_operand:VSX_B 1 "vsx_register_operand" "%<VSr>,<VSr>,wa,wa"))
+	 (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
+	 (neg:VSX_B
+	  (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa"))))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode) && !HONOR_SIGNED_ZEROS (<MODE>mode)"
+  "@
+   x<VSv>nmadda<VSs> %x0,%x1,%x2
+   x<VSv>nmaddm<VSs> %x0,%x1,%x3
+   x<VSv>nmadda<VSs> %x0,%x1,%x2
+   x<VSv>nmaddm<VSs> %x0,%x1,%x3"
+  [(set_attr "type" "<VStype_mul>")
+   (set_attr "fp_type" "<VSfptype_mul>")])
+
 (define_insn "vsx_fnmsub<mode>4"
   [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
 	(neg:VSX_B
@@ -717,6 +734,22 @@  (define_insn "vsx_fnmsub<mode>4_2"
   [(set_attr "type" "<VStype_mul>")
    (set_attr "fp_type" "<VSfptype_mul>")])
 
+(define_insn "*vsx_fnmsub<mode>4_3"
+  [(set (match_operand:VSX_B 0 "vsx_register_operand" "=<VSr>,<VSr>,?wa,?wa")
+	 (fma:VSX_B
+	  (neg:VSX_B
+	   (match_operand:VSX_B 1 "vsx_register_operand" "<VSr>,<VSr>,wa,wa"))
+	  (match_operand:VSX_B 2 "vsx_register_operand" "<VSr>,0,wa,0")
+	  (match_operand:VSX_B 3 "vsx_register_operand" "0,<VSr>,0,wa")))]
+  "VECTOR_UNIT_VSX_P (<MODE>mode) && !HONOR_SIGNED_ZEROS (<MODE>mode)"
+  "@
+   x<VSv>nmsuba<VSs> %x0,%x1,%x2
+   x<VSv>nmsubm<VSs> %x0,%x1,%x3
+   x<VSv>nmsuba<VSs> %x0,%x1,%x2
+   x<VSv>nmsubm<VSs> %x0,%x1,%x3"
+  [(set_attr "type" "<VStype_mul>")
+   (set_attr "fp_type" "<VSfptype_mul>")])
+
 ;; Vector conditional expressions (no scalar version for these instructions)
 (define_insn "vsx_eq<mode>"
   [(set (match_operand:VSX_F 0 "vsx_register_operand" "=<VSr>,?wa")
Index: gcc/config/rs6000/rs6000.h
===================================================================
--- gcc/config/rs6000/rs6000.h	(revision 166510)
+++ gcc/config/rs6000/rs6000.h	(working copy)
@@ -597,6 +597,9 @@  extern int rs6000_vector_align[];
 			 && TARGET_DOUBLE_FLOAT \
 			 && (TARGET_PPC_GFXOPT || VECTOR_UNIT_VSX_P (DFmode)))
 
+/* Old -mno-fused-add is now -ffp-contract=off.  */
+#define TARGET_FUSED_MADD (flag_fp_contract_mode != FP_CONTRACT_OFF)
+
 /* Whether the various reciprocal divide/square root estimate instructions
    exist, and whether we should automatically generate code for the instruction
    by default.  */
Index: gcc/config/rs6000/rs6000.md
===================================================================
--- gcc/config/rs6000/rs6000.md	(revision 166510)
+++ gcc/config/rs6000/rs6000.md	(working copy)
@@ -5870,7 +5870,7 @@  (define_insn "fmasf4_fpr"
   [(set_attr "type" "fp")
    (set_attr "fp_type" "fp_maddsub_s")])
 
-(define_insn "*fmssf4_fpr"
+(define_insn "fmssf4"
   [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
 	(fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
 		(match_operand:SF 2 "gpc_reg_operand" "f")
@@ -5885,7 +5885,28 @@  (define_insn "*fmssf4_fpr"
   [(set_attr "type" "fp")
    (set_attr "fp_type" "fp_maddsub_s")])
 
-(define_insn "*fnmasf4_fpr"
+;; Note, FNMA is defined to be (-a * b) + c, and FNMS is defined to be
+;; (-a * b) - c.  The PowerPC defines its FNMADD operation as -((a*b)+c)
+;; and FNMSUB as -((a*b)-c), so for FNMA we need to emit a FNMSUB and for
+;; FNMS we need to emit a FNMADD.  And signed zeros are also a possiblity
+;; so make this fast-math only
+(define_insn "fnmasf4"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(fma:SF (neg:SF (match_operand:SF 1 "gpc_reg_operand" "f"))
+		(match_operand:SF 2 "gpc_reg_operand" "f")
+		(match_operand:SF 3 "gpc_reg_operand" "f")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
+   && !HONOR_SIGNED_ZEROS (SFmode)"
+  "*
+{
+  return ((TARGET_POWERPC)
+	  ? \"fnmsubs %0,%1,%2,%3\"
+	  : \"{fnms|fnmsub} %0,%1,%2,%3\");
+}"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_insn "*fnmasf4_2"
   [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
 	(neg:SF (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
 			(match_operand:SF 2 "gpc_reg_operand" "f")
@@ -5900,7 +5921,23 @@  (define_insn "*fnmasf4_fpr"
   [(set_attr "type" "fp")
    (set_attr "fp_type" "fp_maddsub_s")])
 
-(define_insn "*fnmssf4_fpr"
+(define_insn "fnmssf4"
+  [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
+	(fma:SF (neg:SF (match_operand:SF 1 "gpc_reg_operand" "f"))
+		(match_operand:SF 2 "gpc_reg_operand" "f")
+		(neg:SF (match_operand:SF 3 "gpc_reg_operand" "f"))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT
+   && !HONOR_SIGNED_ZEROS (SFmode)"
+  "*
+{
+  return ((TARGET_POWERPC)
+	  ? \"fnmadds %0,%1,%2,%3\"
+	  : \"{fnma|fnmadd} %0,%1,%2,%3\");
+}"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_insn "*fnmssf4_2"
   [(set (match_operand:SF 0 "gpc_reg_operand" "=f")
 	(neg:SF (fma:SF (match_operand:SF 1 "gpc_reg_operand" "f")
 			(match_operand:SF 2 "gpc_reg_operand" "f")
@@ -6396,43 +6433,96 @@  (define_expand "fmadf4"
   "")
 
 (define_insn "fmadf4_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
-	(fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
-		(match_operand:DF 2 "gpc_reg_operand" "f")
-		(match_operand:DF 3 "gpc_reg_operand" "f")))]
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(fma:DF (match_operand:DF 1 "gpc_reg_operand" "d")
+		(match_operand:DF 2 "gpc_reg_operand" "d")
+		(match_operand:DF 3 "gpc_reg_operand" "d")))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
    && VECTOR_UNIT_NONE_P (DFmode)"
   "{fma|fmadd} %0,%1,%2,%3"
   [(set_attr "type" "fp")
    (set_attr "fp_type" "fp_maddsub_s")])
 
+(define_expand "fmsdf4"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(fma:DF (match_operand:DF 1 "gpc_reg_operand" "")
+		(match_operand:DF 2 "gpc_reg_operand" "")
+		(neg:DF (match_operand:DF 3 "gpc_reg_operand" ""))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT"
+  "")
+
 (define_insn "*fmsdf4_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
-	(fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
-		(match_operand:DF 2 "gpc_reg_operand" "f")
-		(neg:DF (match_operand:DF 3 "gpc_reg_operand" "f"))))]
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(fma:DF (match_operand:DF 1 "gpc_reg_operand" "d")
+		(match_operand:DF 2 "gpc_reg_operand" "d")
+		(neg:DF (match_operand:DF 3 "gpc_reg_operand" "d"))))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
    && VECTOR_UNIT_NONE_P (DFmode)"
   "{fms|fmsub} %0,%1,%2,%3"
   [(set_attr "type" "fp")
    (set_attr "fp_type" "fp_maddsub_s")])
 
-(define_insn "*fnmadf4_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
-	(neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
-			(match_operand:DF 2 "gpc_reg_operand" "f")
-			(match_operand:DF 3 "gpc_reg_operand" "f"))))]
+;; Note, FNMA is defined to be (-a * b) + c, and FNMS is defined to be
+;; (-a * b) - c.  The PowerPC defines its FNMADD operation as -((a*b)+c)
+;; and FNMSUB as -((a*b)-c), so for FNMA we need to emit a FNMSUB and for
+;; FNMS we need to emit a FNMADD.  And signed zeros are also a possiblity
+;; so make this fast-math only
+(define_expand "fnmadf4"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(fma:DF (neg:DF (match_operand:DF 1 "gpc_reg_operand" ""))
+		(match_operand:DF 2 "gpc_reg_operand" "")
+		(match_operand:DF 3 "gpc_reg_operand" "")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && !HONOR_SIGNED_ZEROS (DFmode)"
+  "")
+
+(define_insn "*fnmadf4_fpr_1"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(fma:DF (neg:DF (match_operand:DF 1 "gpc_reg_operand" "d"))
+		(match_operand:DF 2 "gpc_reg_operand" "d")
+		(match_operand:DF 3 "gpc_reg_operand" "d")))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && VECTOR_UNIT_NONE_P (DFmode) && !HONOR_SIGNED_ZEROS (DFmode)"
+  "{fnms|fnmsub} %0,%1,%2,%3"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_insn "*fnmadf4_fpr_2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "d")
+			(match_operand:DF 2 "gpc_reg_operand" "d")
+			(match_operand:DF 3 "gpc_reg_operand" "d"))))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
    && VECTOR_UNIT_NONE_P (DFmode)"
   "{fnma|fnmadd} %0,%1,%2,%3"
   [(set_attr "type" "fp")
    (set_attr "fp_type" "fp_maddsub_s")])
 
-(define_insn "*fnmsdf4_fpr"
-  [(set (match_operand:DF 0 "gpc_reg_operand" "=f")
-	(neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "f")
-			(match_operand:DF 2 "gpc_reg_operand" "f")
-			(neg:DF (match_operand:DF 3 "gpc_reg_operand" "f")))))]
+(define_expand "fnmsdf4"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "")
+	(fma:DF (neg:DF (match_operand:DF 1 "gpc_reg_operand" ""))
+		(match_operand:DF 2 "gpc_reg_operand" "")
+		(neg:DF (match_operand:DF 3 "gpc_reg_operand" ""))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && !HONOR_SIGNED_ZEROS (DFmode)"
+  "")
+
+(define_insn "*fnmsdf4_fpr_1"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(fma:DF (neg:DF (match_operand:DF 1 "gpc_reg_operand" "d"))
+		(match_operand:DF 2 "gpc_reg_operand" "d")
+		(neg:DF (match_operand:DF 3 "gpc_reg_operand" "d"))))]
+  "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
+   && VECTOR_UNIT_NONE_P (DFmode) && !HONOR_SIGNED_ZEROS (DFmode)"
+  "{fnma|fnmadd} %0,%1,%2,%3"
+  [(set_attr "type" "fp")
+   (set_attr "fp_type" "fp_maddsub_s")])
+
+(define_insn "*fnmsdf4_fpr_2"
+  [(set (match_operand:DF 0 "gpc_reg_operand" "=d")
+	(neg:DF (fma:DF (match_operand:DF 1 "gpc_reg_operand" "d")
+			(match_operand:DF 2 "gpc_reg_operand" "d")
+			(neg:DF (match_operand:DF 3 "gpc_reg_operand" "d")))))]
   "TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
    && VECTOR_UNIT_NONE_P (DFmode)"
   "{fnms|fnmsub} %0,%1,%2,%3"