[committed,AArch64] Add a "GP strictness" operand to SVE FP unspecs
diff mbox series

Message ID mpto90sp3dm.fsf@arm.com
State New
Headers show
Series
  • [committed,AArch64] Add a "GP strictness" operand to SVE FP unspecs
Related show

Commit Message

Richard Sandiford Aug. 14, 2019, 8:17 a.m. UTC
This patch makes the SVE unary, binary and ternary FP unspecs
take a new "GP strictness" operand that indicates whether the
predicate has to be taken literally, or whether it is valid to
make extra lanes active (up to and including using a PTRUE).

This again is laying the groundwork for the ACLE patterns,
in which the value can depend on the FP command-line flags.

At the moment it's only needed for addition, subtraction and
multiplication, which have unpredicated forms that can only
be used when operating on all lanes is safe.  But in future
it might be useful for optimising predicate usage.

The strict mode requires extra alternatives for addition,
subtraction and multiplication, but I've left those for the
main ACLE patch.

Tested on aarch64-linux-gnu (with and without SVE) and aarch64_be-elf.
Applied as r274418.

Richard


2019-08-14  Richard Sandiford  <richard.sandiford@arm.com>
	    Kugan Vivekanandarajah  <kugan.vivekanandarajah@linaro.org>

gcc/
	* config/aarch64/aarch64.md (SVE_RELAXED_GP, SVE_STRICT_GP): New
	constants.
	* config/aarch64/predicates.md (aarch64_sve_gp_strictness): New
	predicate.
	* config/aarch64/aarch64-protos.h (aarch64_sve_pred_dominates_p):
	Declare.
	* config/aarch64/aarch64.c (aarch64_sve_pred_dominates_p): New
	function.
	* config/aarch64/aarch64-sve.md: Add a block comment about the
	handling of predicated FP operations.
	(<SVE_COND_FP_UNARY:optab><SVE_F:mode>2, add<SVE_F:mode>3)
	(sub<SVE_F:mode>3, mul<SVE_F:mode>3, div<SVE_F:mode>3)
	(<SVE_COND_FP_MAXMIN_PUBLIC:optab><SVE_F:mode>3)
	(<SVE_COND_FP_MAXMIN_PUBLIC:maxmin_uns><SVE_F:mode>3)
	(<SVE_COND_FP_TERNARY:optab><SVE_F:mode>4): Add an SVE_RELAXED_GP
	operand.
	(cond_<SVE_COND_FP_BINARY:optab><SVE_F:mode>)
	(cond_<SVE_COND_FP_TERNARY:optab><SVE_F:mode>): Add an SVE_STRICT_GP
	operand.
	(*<SVE_COND_FP_UNARY:optab><SVE_F:mode>2)
	(*cond_<SVE_COND_FP_BINARY:optab><SVE_F:mode>_2)
	(*cond_<SVE_COND_FP_BINARY:optab><SVE_F:mode>_3)
	(*cond_<SVE_COND_FP_BINARY:optab><SVE_F:mode>_any)
	(*fabd<SVE_F:mode>3, *div<SVE_F:mode>3)
	(*<SVE_COND_FP_MAXMIN_PUBLIC:optab><SVE_F:mode>3)
	(*<SVE_COND_FP_TERNARY:optab><SVE_F:mode>4)
	(*cond_<SVE_COND_FP_TERNARY:optab><SVE_F:mode>_2)
	(*cond_<SVE_COND_FP_TERNARY:optab><SVE_F:mode>_4)
	(*cond_<SVE_COND_FP_TERNARY:optab><SVE_F:mode>_any): Match the
	strictness operands.  Use aarch64_sve_pred_dominates_p to check
	whether the predicate on the conditional operation is suitable
	for merging.  Split patterns into the canonical equal-predicate form.
	(*add<SVE_F:mode>3, *sub<SVE_F:mode>3, *mul<SVE_F:mode>3): Likewise.
	Restrict the unpredicated alternatives to SVE_RELAXED_GP.

Patch
diff mbox series

Index: gcc/config/aarch64/aarch64.md
===================================================================
--- gcc/config/aarch64/aarch64.md	2019-08-14 08:58:06.357767418 +0100
+++ gcc/config/aarch64/aarch64.md	2019-08-14 09:13:55.210734712 +0100
@@ -268,6 +268,18 @@  (define_constants
    ; Indicates that the predicate is known to be a PTRUE.
    (SVE_KNOWN_PTRUE 1)])
 
+;; These constants are used as a const_int in predicated SVE FP arithmetic
+;; to indicate whether the operation is allowed to make additional lanes
+;; active without worrying about the effect on faulting behavior.
+(define_constants
+  [; Indicates either that all lanes are active or that the instruction may
+   ; operate on inactive inputs even if doing so could induce a fault.
+   (SVE_RELAXED_GP 0)
+
+   ; Indicates that some lanes might be inactive and that the instruction
+   ; must not operate on inactive inputs if doing so could induce a fault.
+   (SVE_STRICT_GP 1)])
+
 ;; If further include files are added the defintion of MD_INCLUDES
 ;; must be updated.
 
Index: gcc/config/aarch64/predicates.md
===================================================================
--- gcc/config/aarch64/predicates.md	2019-08-14 08:58:06.357767418 +0100
+++ gcc/config/aarch64/predicates.md	2019-08-14 09:13:55.210734712 +0100
@@ -689,6 +689,11 @@  (define_predicate "aarch64_sve_ptrue_fla
        (ior (match_test "INTVAL (op) == SVE_MAYBE_NOT_PTRUE")
 	    (match_test "INTVAL (op) == SVE_KNOWN_PTRUE"))))
 
+(define_predicate "aarch64_sve_gp_strictness"
+  (and (match_code "const_int")
+       (ior (match_test "INTVAL (op) == SVE_RELAXED_GP")
+	    (match_test "INTVAL (op) == SVE_STRICT_GP"))))
+
 (define_predicate "aarch64_gather_scale_operand_w"
   (and (match_code "const_int")
        (match_test "INTVAL (op) == 1 || INTVAL (op) == 4")))
Index: gcc/config/aarch64/aarch64-protos.h
===================================================================
--- gcc/config/aarch64/aarch64-protos.h	2019-08-14 08:58:06.349767478 +0100
+++ gcc/config/aarch64/aarch64-protos.h	2019-08-14 09:13:55.206734742 +0100
@@ -554,6 +554,7 @@  rtx aarch64_ptrue_all (unsigned int);
 void aarch64_expand_mov_immediate (rtx, rtx);
 rtx aarch64_ptrue_reg (machine_mode);
 rtx aarch64_pfalse_reg (machine_mode);
+bool aarch64_sve_pred_dominates_p (rtx *, rtx);
 void aarch64_emit_sve_pred_move (rtx, rtx, rtx);
 void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode);
 bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx);
Index: gcc/config/aarch64/aarch64.c
===================================================================
--- gcc/config/aarch64/aarch64.c	2019-08-14 09:03:20.523438266 +0100
+++ gcc/config/aarch64/aarch64.c	2019-08-14 09:13:55.210734712 +0100
@@ -2765,6 +2765,24 @@  aarch64_pfalse_reg (machine_mode mode)
   return gen_lowpart (mode, reg);
 }
 
+/* Return true if predicate PRED1[0] is true whenever predicate PRED2 is
+   true, or alternatively if we know that the operation predicated by
+   PRED1[0] is safe to perform whenever PRED2 is true.  PRED1[1] is a
+   aarch64_sve_gp_strictness operand that describes the operation
+   predicated by PRED1[0].  */
+
+bool
+aarch64_sve_pred_dominates_p (rtx *pred1, rtx pred2)
+{
+  machine_mode mode = GET_MODE (pred2);
+  gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL
+	      && mode == GET_MODE (pred1[0])
+	      && aarch64_sve_gp_strictness (pred1[1], SImode));
+  return (pred1[0] == CONSTM1_RTX (mode)
+	  || INTVAL (pred1[1]) == SVE_RELAXED_GP
+	  || rtx_equal_p (pred1[0], pred2));
+}
+
 /* Use a comparison to convert integer vector SRC into MODE, which is
    the corresponding SVE predicate mode.  Use TARGET for the result
    if it's nonnull and convenient.  */
Index: gcc/config/aarch64/aarch64-sve.md
===================================================================
--- gcc/config/aarch64/aarch64-sve.md	2019-08-14 09:11:39.879737400 +0100
+++ gcc/config/aarch64/aarch64-sve.md	2019-08-14 09:13:55.206734742 +0100
@@ -24,6 +24,7 @@ 
 ;; == General notes
 ;; ---- Note on the handling of big-endian SVE
 ;; ---- Description of UNSPEC_PTEST
+;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
 ;;
 ;; == Moves
 ;; ---- Moves of single vectors
@@ -228,6 +229,83 @@ 
 ;;   SVE_MAYBE_NOT_PTRUE otherwise.
 ;;
 ;; - OP is the predicate we want to test, of the same mode as CAST_GP.
+;;
+;; -------------------------------------------------------------------------
+;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
+;; -------------------------------------------------------------------------
+;;
+;; Most SVE floating-point operations are predicated.  We can generate
+;; them from four sources:
+;;
+;; (1) Using normal unpredicated optabs.  In this case we need to create
+;;     an all-true predicate register to act as the governing predicate
+;;     for the SVE instruction.  There are no inactive lanes, and thus
+;;     the values of inactive lanes don't matter.
+;;
+;; (2) Using _x ACLE functions.  In this case the function provides a
+;;     specific predicate and some lanes might be inactive.  However,
+;;     as for (1), the values of the inactive lanes don't matter.
+;;
+;;     The instruction must have the same exception behavior as the
+;;     function call unless things like command-line flags specifically
+;;     allow otherwise.  For example, with -ffast-math, it is OK to
+;;     raise exceptions for inactive lanes, but normally it isn't.
+;;
+;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
+;;     These optabs have a predicate operand that specifies which lanes are
+;;     active and another operand that provides the values of inactive lanes.
+;;
+;; (4) Using _m and _z ACLE functions.  These functions map to the same
+;;     patterns as (3), with the _z functions setting inactive lanes to zero
+;;     and the _m functions setting the inactive lanes to one of the function
+;;     arguments.
+;;
+;; So:
+;;
+;; - In (1), the predicate is known to be all true and the pattern can use
+;;   unpredicated operations where available.
+;;
+;; - In (2), the predicate might or might not be all true.  The pattern can
+;;   use unpredicated instructions if the predicate is all-true or if things
+;;   like command-line flags allow exceptions for inactive lanes.
+;;
+;; - (3) and (4) represent a native SVE predicated operation.  Some lanes
+;;   might be inactive and inactive lanes of the result must have specific
+;;   values.  There is no scope for using unpredicated instructions (and no
+;;   reason to want to), so the question about command-line flags doesn't
+;;   arise.
+;;
+;; It would be inaccurate to model (2) as an rtx code like (sqrt ...)
+;; in combination with a separate predicate operand, e.g.
+;;
+;;   (unspec [(match_operand:<VPRED> 1 "register_operand" "Upl")
+;;	      (sqrt:SVE_F 2 "register_operand" "w")]
+;;	     ....)
+;;
+;; because (sqrt ...) can raise an exception for any lane, including
+;; inactive ones.  We therefore need to use an unspec instead.
+;;
+;; Also, (2) requires some way of distinguishing the case in which the
+;; predicate might have inactive lanes and cannot be changed from the
+;; case in which the predicate has no inactive lanes or can be changed.
+;; This information is also useful when matching combined FP patterns
+;; in which the predicates might not be equal.
+;;
+;; We therefore model FP operations as an unspec of the form:
+;;
+;;   (unspec [pred strictness op0 op1 ...] UNSPEC_COND_<MNEMONIC>)
+;;
+;; where:
+;;
+;; - PRED is the governing predicate.
+;;
+;; - STRICTNESS is a CONST_INT that conceptually has mode SI.  It has the
+;;   value SVE_STRICT_GP if PRED might have inactive lanes and if those
+;;   lanes must remain inactive.  It has the value SVE_RELAXED_GP otherwise.
+;;
+;; - OP0 OP1 ... are the normal input operands to the operation.
+;;
+;; - MNEMONIC is the mnemonic of the associated SVE instruction.
 
 ;; =========================================================================
 ;; == Moves
@@ -1290,6 +1368,7 @@  (define_expand "<optab><mode>2"
   [(set (match_operand:SVE_F 0 "register_operand")
 	(unspec:SVE_F
 	  [(match_dup 2)
+	   (const_int SVE_RELAXED_GP)
 	   (match_operand:SVE_F 1 "register_operand")]
 	  SVE_COND_FP_UNARY))]
   "TARGET_SVE"
@@ -1303,6 +1382,7 @@  (define_insn "*<optab><mode>2"
   [(set (match_operand:SVE_F 0 "register_operand" "=w")
 	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
 	   (match_operand:SVE_F 2 "register_operand" "w")]
 	  SVE_COND_FP_UNARY))]
   "TARGET_SVE"
@@ -1964,6 +2044,7 @@  (define_expand "cond_<optab><mode>"
 	  [(match_operand:<VPRED> 1 "register_operand")
 	   (unspec:SVE_F
 	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
 	      (match_operand:SVE_F 2 "register_operand")
 	      (match_operand:SVE_F 3 "register_operand")]
 	     SVE_COND_FP_BINARY)
@@ -1973,40 +2054,50 @@  (define_expand "cond_<optab><mode>"
 )
 
 ;; Predicated floating-point operations, merging with the first input.
-(define_insn "*cond_<optab><mode>_2"
+(define_insn_and_rewrite "*cond_<optab><mode>_2"
   [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 	   (unspec:SVE_F
-	     [(match_dup 1)
+	     [(match_operand 4)
+	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
 	      (match_operand:SVE_F 2 "register_operand" "0, w")
 	      (match_operand:SVE_F 3 "register_operand" "w, w")]
 	     SVE_COND_FP_BINARY)
 	   (match_dup 2)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE"
+  "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
   "@
    <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
    movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>"
+  "&& !rtx_equal_p (operands[1], operands[4])"
+  {
+    operands[4] = copy_rtx (operands[1]);
+  }
   [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Predicated floating-point operations, merging with the second input.
-(define_insn "*cond_<optab><mode>_3"
+(define_insn_and_rewrite "*cond_<optab><mode>_3"
   [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 	   (unspec:SVE_F
-	     [(match_dup 1)
+	     [(match_operand 4)
+	      (match_operand:SI 5 "aarch64_sve_gp_strictness")
 	      (match_operand:SVE_F 2 "register_operand" "w, w")
 	      (match_operand:SVE_F 3 "register_operand" "0, w")]
 	     SVE_COND_FP_BINARY)
 	   (match_dup 3)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE"
+  "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[4], operands[1])"
   "@
    <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
    movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>"
+  "&& !rtx_equal_p (operands[1], operands[4])"
+  {
+    operands[4] = copy_rtx (operands[1]);
+  }
   [(set_attr "movprfx" "*,yes")]
 )
 
@@ -2016,7 +2107,8 @@  (define_insn_and_rewrite "*cond_<optab><
 	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl, Upl")
 	   (unspec:SVE_F
-	     [(match_dup 1)
+	     [(match_operand 5)
+	      (match_operand:SI 6 "aarch64_sve_gp_strictness")
 	      (match_operand:SVE_F 2 "register_operand" "0, w, w, w, w")
 	      (match_operand:SVE_F 3 "register_operand" "w, 0, w, w, w")]
 	     SVE_COND_FP_BINARY)
@@ -2024,20 +2116,28 @@  (define_insn_and_rewrite "*cond_<optab><
 	  UNSPEC_SEL))]
   "TARGET_SVE
    && !rtx_equal_p (operands[2], operands[4])
-   && !rtx_equal_p (operands[3], operands[4])"
+   && !rtx_equal_p (operands[3], operands[4])
+   && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
   "@
    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
    movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
    movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
    movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
    #"
-  "&& reload_completed
-   && register_operand (operands[4], <MODE>mode)
-   && !rtx_equal_p (operands[0], operands[4])"
+  "&& 1"
   {
-    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
-					     operands[4], operands[1]));
-    operands[4] = operands[2] = operands[0];
+    if (reload_completed
+        && register_operand (operands[4], <MODE>mode)
+        && !rtx_equal_p (operands[0], operands[4]))
+      {
+	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
+						 operands[4], operands[1]));
+	operands[4] = operands[2] = operands[0];
+      }
+    else if (!rtx_equal_p (operands[1], operands[5]))
+      operands[5] = copy_rtx (operands[1]);
+    else
+      FAIL;
   }
   [(set_attr "movprfx" "yes")]
 )
@@ -2055,6 +2155,7 @@  (define_expand "add<mode>3"
   [(set (match_operand:SVE_F 0 "register_operand")
 	(unspec:SVE_F
 	  [(match_dup 3)
+	   (const_int SVE_RELAXED_GP)
 	   (match_operand:SVE_F 1 "register_operand")
 	   (match_operand:SVE_F 2 "aarch64_sve_float_arith_with_sub_operand")]
 	  UNSPEC_COND_FADD))]
@@ -2064,11 +2165,12 @@  (define_expand "add<mode>3"
   }
 )
 
-;; Floating-point addition predicated with a PTRUE.
+;; Predicated floating-point addition.
 (define_insn_and_split "*add<mode>3"
   [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w")
 	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, Z")
 	   (match_operand:SVE_F 2 "register_operand" "%0, 0, w")
 	   (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, w")]
 	  UNSPEC_COND_FADD))]
@@ -2100,6 +2202,7 @@  (define_expand "sub<mode>3"
   [(set (match_operand:SVE_F 0 "register_operand")
 	(unspec:SVE_F
 	  [(match_dup 3)
+	   (const_int SVE_RELAXED_GP)
 	   (match_operand:SVE_F 1 "aarch64_sve_float_arith_operand")
 	   (match_operand:SVE_F 2 "register_operand")]
 	  UNSPEC_COND_FSUB))]
@@ -2109,11 +2212,12 @@  (define_expand "sub<mode>3"
   }
 )
 
-;; Floating-point subtraction predicated with a PTRUE.
+;; Predicated floating-point subtraction.
 (define_insn_and_split "*sub<mode>3"
   [(set (match_operand:SVE_F 0 "register_operand" "=w, w, w, w")
 	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl, Upl")
+	   (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, i, i, Z")
 	   (match_operand:SVE_F 2 "aarch64_sve_float_arith_operand" "0, 0, vsA, w")
 	   (match_operand:SVE_F 3 "aarch64_sve_float_arith_with_sub_operand" "vsA, vsN, 0, w")]
 	  UNSPEC_COND_FSUB))]
@@ -2143,18 +2247,24 @@  (define_insn_and_split "*sub<mode>3"
 ;; -------------------------------------------------------------------------
 
 ;; Predicated floating-point absolute difference.
-(define_insn "*fabd<mode>3"
+(define_insn_and_rewrite "*fabd<mode>3"
   [(set (match_operand:SVE_F 0 "register_operand" "=w")
 	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
 	   (unspec:SVE_F
-	     [(match_dup 1)
+	     [(match_operand 5)
+	      (match_operand:SI 6 "aarch64_sve_gp_strictness")
 	      (match_operand:SVE_F 2 "register_operand" "0")
 	      (match_operand:SVE_F 3 "register_operand" "w")]
 	     UNSPEC_COND_FSUB)]
 	  UNSPEC_COND_FABS))]
-  "TARGET_SVE"
+  "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
   "fabd\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+  "&& !rtx_equal_p (operands[1], operands[5])"
+  {
+    operands[5] = copy_rtx (operands[1]);
+  }
 )
 
 ;; -------------------------------------------------------------------------
@@ -2169,6 +2279,7 @@  (define_expand "mul<mode>3"
   [(set (match_operand:SVE_F 0 "register_operand")
 	(unspec:SVE_F
 	  [(match_dup 3)
+	   (const_int SVE_RELAXED_GP)
 	   (match_operand:SVE_F 1 "register_operand")
 	   (match_operand:SVE_F 2 "aarch64_sve_float_mul_operand")]
 	  UNSPEC_COND_FMUL))]
@@ -2178,11 +2289,12 @@  (define_expand "mul<mode>3"
   }
 )
 
-;; Floating-point multiplication predicated with a PTRUE.
+;; Predicated floating-point multiplication.
 (define_insn_and_split "*mul<mode>3"
   [(set (match_operand:SVE_F 0 "register_operand" "=w, w")
 	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (match_operand:SI 4 "aarch64_sve_gp_strictness" "i, Z")
 	   (match_operand:SVE_F 2 "register_operand" "%0, w")
 	   (match_operand:SVE_F 3 "aarch64_sve_float_mul_operand" "vsM, w")]
 	  UNSPEC_COND_FMUL))]
@@ -2212,6 +2324,7 @@  (define_expand "div<mode>3"
   [(set (match_operand:SVE_F 0 "register_operand")
 	(unspec:SVE_F
 	  [(match_dup 3)
+	   (const_int SVE_RELAXED_GP)
 	   (match_operand:SVE_F 1 "register_operand")
 	   (match_operand:SVE_F 2 "register_operand")]
 	  UNSPEC_COND_FDIV))]
@@ -2221,11 +2334,12 @@  (define_expand "div<mode>3"
   }
 )
 
-;; Floating-point division predicated with a PTRUE.
+;; Predicated floating-point division.
 (define_insn "*div<mode>3"
   [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
 	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
 	   (match_operand:SVE_F 2 "register_operand" "0, w, w")
 	   (match_operand:SVE_F 3 "register_operand" "w, 0, w")]
 	  UNSPEC_COND_FDIV))]
@@ -2334,6 +2448,7 @@  (define_expand "<optab><mode>3"
   [(set (match_operand:SVE_F 0 "register_operand")
 	(unspec:SVE_F
 	  [(match_dup 3)
+	   (const_int SVE_RELAXED_GP)
 	   (match_operand:SVE_F 1 "register_operand")
 	   (match_operand:SVE_F 2 "register_operand")]
 	  SVE_COND_FP_MAXMIN_PUBLIC))]
@@ -2348,6 +2463,7 @@  (define_expand "<maxmin_uns><mode>3"
   [(set (match_operand:SVE_F 0 "register_operand")
 	(unspec:SVE_F
 	  [(match_dup 3)
+	   (const_int SVE_RELAXED_GP)
 	   (match_operand:SVE_F 1 "register_operand")
 	   (match_operand:SVE_F 2 "register_operand")]
 	  SVE_COND_FP_MAXMIN_PUBLIC))]
@@ -2362,6 +2478,7 @@  (define_insn "*<optab><mode>3"
   [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
+	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
 	   (match_operand:SVE_F 2 "register_operand" "%0, w")
 	   (match_operand:SVE_F 3 "register_operand" "w, w")]
 	  SVE_COND_FP_MAXMIN_PUBLIC))]
@@ -2612,6 +2729,7 @@  (define_expand "<optab><mode>4"
   [(set (match_operand:SVE_F 0 "register_operand")
 	(unspec:SVE_F
 	  [(match_dup 4)
+	   (const_int SVE_RELAXED_GP)
 	   (match_operand:SVE_F 1 "register_operand")
 	   (match_operand:SVE_F 2 "register_operand")
 	   (match_operand:SVE_F 3 "register_operand")]
@@ -2627,6 +2745,7 @@  (define_insn "*<optab><mode>4"
   [(set (match_operand:SVE_F 0 "register_operand" "=w, w, ?&w")
 	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
+	   (match_operand:SI 5 "aarch64_sve_gp_strictness")
 	   (match_operand:SVE_F 2 "register_operand" "%w, 0, w")
 	   (match_operand:SVE_F 3 "register_operand" "w, w, w")
 	   (match_operand:SVE_F 4 "register_operand" "0, w, w")]
@@ -2646,6 +2765,7 @@  (define_expand "cond_<optab><mode>"
 	  [(match_operand:<VPRED> 1 "register_operand")
 	   (unspec:SVE_F
 	     [(match_dup 1)
+	      (const_int SVE_STRICT_GP)
 	      (match_operand:SVE_F 2 "register_operand")
 	      (match_operand:SVE_F 3 "register_operand")
 	      (match_operand:SVE_F 4 "register_operand")]
@@ -2662,43 +2782,53 @@  (define_expand "cond_<optab><mode>"
 
 ;; Predicated floating-point ternary operations, merging with the
 ;; first input.
-(define_insn "*cond_<optab><mode>_2"
+(define_insn_and_rewrite "*cond_<optab><mode>_2"
   [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 	   (unspec:SVE_F
-	     [(match_dup 1)
+	     [(match_operand 5)
+	      (match_operand:SI 6 "aarch64_sve_gp_strictness")
 	      (match_operand:SVE_F 2 "register_operand" "0, w")
 	      (match_operand:SVE_F 3 "register_operand" "w, w")
 	      (match_operand:SVE_F 4 "register_operand" "w, w")]
 	     SVE_COND_FP_TERNARY)
 	   (match_dup 2)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE"
+  "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
   "@
    <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
    movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>"
+  "&& !rtx_equal_p (operands[1], operands[5])"
+  {
+    operands[5] = copy_rtx (operands[1]);
+  }
   [(set_attr "movprfx" "*,yes")]
 )
 
 ;; Predicated floating-point ternary operations, merging with the
 ;; third input.
-(define_insn "*cond_<optab><mode>_4"
+(define_insn_and_rewrite "*cond_<optab><mode>_4"
   [(set (match_operand:SVE_F 0 "register_operand" "=w, ?&w")
 	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl")
 	   (unspec:SVE_F
-	     [(match_dup 1)
+	     [(match_operand 5)
+	      (match_operand:SI 6 "aarch64_sve_gp_strictness")
 	      (match_operand:SVE_F 2 "register_operand" "w, w")
 	      (match_operand:SVE_F 3 "register_operand" "w, w")
 	      (match_operand:SVE_F 4 "register_operand" "0, w")]
 	     SVE_COND_FP_TERNARY)
 	   (match_dup 4)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE"
+  "TARGET_SVE && aarch64_sve_pred_dominates_p (&operands[5], operands[1])"
   "@
    <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
    movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>"
+  "&& !rtx_equal_p (operands[1], operands[5])"
+  {
+    operands[5] = copy_rtx (operands[1]);
+  }
   [(set_attr "movprfx" "*,yes")]
 )
 
@@ -2709,7 +2839,8 @@  (define_insn_and_rewrite "*cond_<optab><
 	(unspec:SVE_F
 	  [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
 	   (unspec:SVE_F
-	     [(match_dup 1)
+	     [(match_operand 6)
+	      (match_operand:SI 7 "aarch64_sve_gp_strictness")
 	      (match_operand:SVE_F 2 "register_operand" "w, w, w")
 	      (match_operand:SVE_F 3 "register_operand" "w, w, w")
 	      (match_operand:SVE_F 4 "register_operand" "w, w, w")]
@@ -2719,18 +2850,26 @@  (define_insn_and_rewrite "*cond_<optab><
   "TARGET_SVE
    && !rtx_equal_p (operands[2], operands[5])
    && !rtx_equal_p (operands[3], operands[5])
-   && !rtx_equal_p (operands[4], operands[5])"
+   && !rtx_equal_p (operands[4], operands[5])
+   && aarch64_sve_pred_dominates_p (&operands[6], operands[1])"
   "@
    movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
    movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
    #"
-  "&& reload_completed
-   && !CONSTANT_P (operands[5])
-   && !rtx_equal_p (operands[0], operands[5])"
+  "&& 1"
   {
-    emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
-					     operands[5], operands[1]));
-    operands[5] = operands[4] = operands[0];
+    if (reload_completed
+        && register_operand (operands[5], <MODE>mode)
+        && !rtx_equal_p (operands[0], operands[5]))
+      {
+	emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
+						 operands[5], operands[1]));
+	operands[5] = operands[4] = operands[0];
+      }
+    else if (!rtx_equal_p (operands[1], operands[6]))
+      operands[6] = copy_rtx (operands[1]);
+    else
+      FAIL;
   }
   [(set_attr "movprfx" "yes")]
 )