===================================================================
@@ -403,8 +403,6 @@
;; Mix-n-match
(define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
-(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
-
;; Mapping of immediate bits for blend instructions
(define_mode_attr blendbits
[(V8SF "255") (V4SF "15") (V4DF "15") (V2DF "3")])
@@ -1886,12 +1884,13 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
-;; FMA4 floating point multiply/accumulate instructions. This
-;; includes the scalar version of the instructions as well as the
-;; vector.
+;; FMA floating point multiply/accumulate instructions. These include
+;; scalar versions of the instructions as well as vector versions.
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+(define_mode_iterator FMAMODE [SF DF V4SF V2DF V8SF V4DF])
+
;; In order to match (*a * *b) + *c, particularly when vectorizing, allow
;; combine to generate a multiply/add with two memory references. We then
;; split this insn, into loading up the destination register with one of the
@@ -1907,8 +1906,6 @@
;; We could now properly represent that only one memory operand is
;; allowed and not be penalized during optimization.
-;; Intrinsic FMA operations.
-
;; The standard names for fma is only available with SSE math enabled.
(define_expand "fma<mode>4"
[(set (match_operand:FMAMODE 0 "register_operand")
@@ -1942,7 +1939,7 @@
(neg:FMAMODE (match_operand:FMAMODE 3 "nonimmediate_operand"))))]
"(TARGET_FMA || TARGET_FMA4) && TARGET_SSE_MATH")
-;; The builtin for fma4intrin.h is not constrained by SSE math enabled.
+;; The builtin for intrinsics is not constrained by SSE math enabled.
(define_expand "fma4i_fmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand")
(fma:FMAMODE
@@ -1951,7 +1948,71 @@
(match_operand:FMAMODE 3 "nonimmediate_operand")))]
"TARGET_FMA || TARGET_FMA4")
-(define_insn "*fma4i_fmadd_<mode>"
+;; FMA3 version
+
+(define_insn "*fma_fmadd_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
+ (fma:FMAMODE
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
+ "TARGET_FMA"
+ "@
+ vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fmsub_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
+ (fma:FMAMODE
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
+ (neg:FMAMODE
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
+ "TARGET_FMA"
+ "@
+ vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fnmadd_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
+ (fma:FMAMODE
+ (neg:FMAMODE
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
+ "TARGET_FMA"
+ "@
+ vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fnmsub_<mode>"
+ [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
+ (fma:FMAMODE
+ (neg:FMAMODE
+ (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
+ (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
+ (neg:FMAMODE
+ (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
+ "TARGET_FMA"
+ "@
+ vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; FMA4 version
+
+(define_insn "*fma4_fmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
(fma:FMAMODE
(match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
@@ -1962,7 +2023,7 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fma4i_fmsub_<mode>"
+(define_insn "*fma4_fmsub_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
(fma:FMAMODE
(match_operand:FMAMODE 1 "nonimmediate_operand" "%x,x")
@@ -1974,7 +2035,7 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fma4i_fnmadd_<mode>"
+(define_insn "*fma4_fnmadd_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
(fma:FMAMODE
(neg:FMAMODE
@@ -1986,7 +2047,7 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-(define_insn "*fma4i_fnmsub_<mode>"
+(define_insn "*fma4_fnmsub_<mode>"
[(set (match_operand:FMAMODE 0 "register_operand" "=x,x")
(fma:FMAMODE
(neg:FMAMODE
@@ -1999,23 +2060,89 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
-;; Scalar versions of the above. Unlike ADDSS et al, these write the
-;; entire destination register, with the high-order elements zeroed.
+;; FMA parallel floating point multiply addsub and subadd operations.
-(define_expand "fma4i_vmfmadd_<mode>"
- [(set (match_operand:VF_128 0 "register_operand")
- (vec_merge:VF_128
- (fma:VF_128
- (match_operand:VF_128 1 "nonimmediate_operand")
- (match_operand:VF_128 2 "nonimmediate_operand")
- (match_operand:VF_128 3 "nonimmediate_operand"))
- (match_dup 4)
- (const_int 1)))]
+;; It would be possible to represent these without the UNSPEC as
+;;
+;; (vec_merge
+;; (fma op1 op2 op3)
+;; (fma op1 op2 (neg op3))
+;; (merge-const))
+;;
+;; But this doesn't seem useful in practice.
+
+(define_expand "fmaddsub_<mode>"
+ [(set (match_operand:VF 0 "register_operand")
+ (unspec:VF
+ [(match_operand:VF 1 "nonimmediate_operand")
+ (match_operand:VF 2 "nonimmediate_operand")
+ (match_operand:VF 3 "nonimmediate_operand")]
+ UNSPEC_FMADDSUB))]
+ "TARGET_FMA || TARGET_FMA4")
+
+;; FMA3 version
+
+(define_insn "*fma_fmaddsub_<mode>"
+ [(set (match_operand:VF 0 "register_operand" "=x,x,x")
+ (unspec:VF
+ [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
+ (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
+ (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
+ UNSPEC_FMADDSUB))]
+ "TARGET_FMA"
+ "@
+ vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+(define_insn "*fma_fmsubadd_<mode>"
+ [(set (match_operand:VF 0 "register_operand" "=x,x,x")
+ (unspec:VF
+ [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
+ (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
+ (neg:VF
+ (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
+ UNSPEC_FMADDSUB))]
+ "TARGET_FMA"
+ "@
+ vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
+ vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
+ vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; FMA4 version
+
+(define_insn "*fma4_fmaddsub_<mode>"
+ [(set (match_operand:VF 0 "register_operand" "=x,x")
+ (unspec:VF
+ [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
+ (match_operand:VF 2 "nonimmediate_operand" " x,m")
+ (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
+ UNSPEC_FMADDSUB))]
"TARGET_FMA4"
-{
- operands[4] = CONST0_RTX (<MODE>mode);
-})
+ "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+(define_insn "*fma4_fmsubadd_<mode>"
+ [(set (match_operand:VF 0 "register_operand" "=x,x")
+ (unspec:VF
+ [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
+ (match_operand:VF 2 "nonimmediate_operand" " x,m")
+ (neg:VF
+ (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
+ UNSPEC_FMADDSUB))]
+ "TARGET_FMA4"
+ "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+ [(set_attr "type" "ssemuladd")
+ (set_attr "mode" "<MODE>")])
+
+;; FMA3 floating point scalar intrinsics. These merge result with
+;; high-order elements from the destination register.
+
(define_expand "fmai_vmfmadd_<mode>"
[(set (match_operand:VF_128 0 "register_operand")
(vec_merge:VF_128
@@ -2099,6 +2226,23 @@
[(set_attr "type" "ssemuladd")
(set_attr "mode" "<MODE>")])
+;; FMA4 floating point scalar intrinsics. These write the
+;; entire destination register, with the high-order elements zeroed.
+
+(define_expand "fma4i_vmfmadd_<mode>"
+ [(set (match_operand:VF_128 0 "register_operand")
+ (vec_merge:VF_128
+ (fma:VF_128
+ (match_operand:VF_128 1 "nonimmediate_operand")
+ (match_operand:VF_128 2 "nonimmediate_operand")
+ (match_operand:VF_128 3 "nonimmediate_operand"))
+ (match_dup 4)
+ (const_int 1)))]
+ "TARGET_FMA4"
+{
+ operands[4] = CONST0_RTX (<MODE>mode);
+})
+
(define_insn "*fma4i_vmfmadd_<mode>"
[(set (match_operand:VF_128 0 "register_operand" "=x,x")
(vec_merge:VF_128
@@ -2161,152 +2305,6 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;
-;; FMA4 Parallel floating point multiply addsub and subadd operations.
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; It would be possible to represent these without the UNSPEC as
-;;
-;; (vec_merge
-;; (fma op1 op2 op3)
-;; (fma op1 op2 (neg op3))
-;; (merge-const))
-;;
-;; But this doesn't seem useful in practice.
-
-(define_expand "fmaddsub_<mode>"
- [(set (match_operand:VF 0 "register_operand")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand")
- (match_operand:VF 2 "nonimmediate_operand")
- (match_operand:VF 3 "nonimmediate_operand")]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA || TARGET_FMA4")
-
-(define_insn "*fma4_fmaddsub_<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x,x")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
- (match_operand:VF 2 "nonimmediate_operand" " x,m")
- (match_operand:VF 3 "nonimmediate_operand" "xm,x")]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA4"
- "vfmaddsub<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma4_fmsubadd_<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x,x")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "%x,x")
- (match_operand:VF 2 "nonimmediate_operand" " x,m")
- (neg:VF
- (match_operand:VF 3 "nonimmediate_operand" "xm,x"))]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA4"
- "vfmsubadd<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
-;; FMA3 floating point multiply/accumulate instructions.
-;;
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(define_insn "*fma_fmadd_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
- (fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
- "TARGET_FMA"
- "@
- vfmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fmsub_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
- (fma:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
- (neg:FMAMODE
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
- "TARGET_FMA"
- "@
- vfmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fnmadd_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
- (fma:FMAMODE
- (neg:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0")))]
- "TARGET_FMA"
- "@
- vfnmadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfnmadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfnmadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fnmsub_<mode>"
- [(set (match_operand:FMAMODE 0 "register_operand" "=x,x,x")
- (fma:FMAMODE
- (neg:FMAMODE
- (match_operand:FMAMODE 1 "nonimmediate_operand" "%0, 0,x"))
- (match_operand:FMAMODE 2 "nonimmediate_operand" "xm, x,xm")
- (neg:FMAMODE
- (match_operand:FMAMODE 3 "nonimmediate_operand" " x,xm,0"))))]
- "TARGET_FMA"
- "@
- vfnmsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfnmsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfnmsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fmaddsub_<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x,x,x")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
- (match_operand:VF 3 "nonimmediate_operand" " x,xm,0")]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA"
- "@
- vfmaddsub132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmaddsub213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmaddsub231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-(define_insn "*fma_fmsubadd_<mode>"
- [(set (match_operand:VF 0 "register_operand" "=x,x,x")
- (unspec:VF
- [(match_operand:VF 1 "nonimmediate_operand" "%0, 0,x")
- (match_operand:VF 2 "nonimmediate_operand" "xm, x,xm")
- (neg:VF
- (match_operand:VF 3 "nonimmediate_operand" " x,xm,0"))]
- UNSPEC_FMADDSUB))]
- "TARGET_FMA"
- "@
- vfmsubadd132<ssemodesuffix>\t{%2, %3, %0|%0, %3, %2}
- vfmsubadd213<ssemodesuffix>\t{%3, %2, %0|%0, %2, %3}
- vfmsubadd231<ssemodesuffix>\t{%2, %1, %0|%0, %1, %2}"
- [(set_attr "type" "ssemuladd")
- (set_attr "mode" "<MODE>")])
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;;
;; Parallel single-precision floating point conversion operations
;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;