diff mbox series

[V3] RISC-V: Add RVV comparison autovectorization

Message ID 20230523150446.699745-1-juzhe.zhong@rivai.ai
State New
Headers show
Series [V3] RISC-V: Add RVV comparison autovectorization | expand

Commit Message

juzhe.zhong@rivai.ai May 23, 2023, 3:04 p.m. UTC
From: Juzhe-Zhong <juzhe.zhong@rivai.ai>

This patch enable RVV auto-vectorization including floating-point
unorder and order comparison.

The testcases are leveraged from Richard.
So include Richard as co-author.

Co-Authored-By: Richard Sandiford <richard.sandiford@arm.com>

gcc/ChangeLog:

        * config/riscv/autovec.md (@vcond_mask_<mode><vm>): New pattern.
        (vec_cmp<mode><vm>): Ditto.
        (vec_cmpu<mode><vm>): Ditto.
        (vcond<V:mode><VI:mode>): Ditto.
        (vcondu<V:mode><VI:mode>): Ditto.
        * config/riscv/riscv-protos.h (enum insn_type): Add new enum.
        (emit_vlmax_merge_insn): New function.
        (emit_vlmax_cmp_insn): Ditto.
        (expand_vec_cmp): Ditto.
        (expand_vec_cmp_float):Ditto.
        (expand_vcond):Ditto.
        * config/riscv/riscv-v.cc (emit_vlmax_merge_insn): Ditto.
        (emit_vlmax_cmp_insn): Ditto.
        (get_cmp_insn_code): Ditto.
        (expand_vec_cmp): Ditto.
        (expand_vec_cmp_float): Ditto.
        (expand_vcond): Ditto.

gcc/testsuite/ChangeLog:

        * gcc.target/riscv/rvv/rvv.exp: Add RVV comparison testcases.
        * gcc.target/riscv/rvv/autovec/cmp/vcond-1.c: New test.
        * gcc.target/riscv/rvv/autovec/cmp/vcond-2.c: New test.
        * gcc.target/riscv/rvv/autovec/cmp/vcond-3.c: New test.
        * gcc.target/riscv/rvv/autovec/cmp/vcond_run-1.c: New test.
        * gcc.target/riscv/rvv/autovec/cmp/vcond_run-2.c: New test.
        * gcc.target/riscv/rvv/autovec/cmp/vcond_run-3.c: New test.

---
 gcc/config/riscv/autovec.md                   | 112 ++++++++
 gcc/config/riscv/riscv-protos.h               |   7 +
 gcc/config/riscv/riscv-v.cc                   | 266 +++++++++++++++++-
 .../riscv/rvv/autovec/cmp/vcond-1.c           | 157 +++++++++++
 .../riscv/rvv/autovec/cmp/vcond-2.c           |  75 +++++
 .../riscv/rvv/autovec/cmp/vcond-3.c           |  13 +
 .../riscv/rvv/autovec/cmp/vcond_run-1.c       |  49 ++++
 .../riscv/rvv/autovec/cmp/vcond_run-2.c       |  76 +++++
 .../riscv/rvv/autovec/cmp/vcond_run-3.c       |   6 +
 gcc/testsuite/gcc.target/riscv/rvv/rvv.exp    |   2 +
 10 files changed, 756 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-3.c

Comments

Richard Biener May 24, 2023, 9:41 a.m. UTC | #1
On Tue, May 23, 2023 at 5:05 PM <juzhe.zhong@rivai.ai> wrote:
>
> From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
>
> This patch enable RVV auto-vectorization including floating-point
> unorder and order comparison.
>
> The testcases are leveraged from Richard.
> So include Richard as co-author.
>
> Co-Authored-By: Richard Sandiford <richard.sandiford@arm.com>
>
> gcc/ChangeLog:
>
>         * config/riscv/autovec.md (@vcond_mask_<mode><vm>): New pattern.
>         (vec_cmp<mode><vm>): Ditto.
>         (vec_cmpu<mode><vm>): Ditto.
>         (vcond<V:mode><VI:mode>): Ditto.
>         (vcondu<V:mode><VI:mode>): Ditto.

Unless the ISA really can do compare and select in a single instruction
I'd advise against adding vcond patterns but instead fully rely on
vec_cmp_ + vcond_mask_ only.

I've not heard of an ISA implementing vcond with a single instructions
so vcond* should eventually die ...

>         * config/riscv/riscv-protos.h (enum insn_type): Add new enum.
>         (emit_vlmax_merge_insn): New function.
>         (emit_vlmax_cmp_insn): Ditto.
>         (expand_vec_cmp): Ditto.
>         (expand_vec_cmp_float):Ditto.
>         (expand_vcond):Ditto.
>         * config/riscv/riscv-v.cc (emit_vlmax_merge_insn): Ditto.
>         (emit_vlmax_cmp_insn): Ditto.
>         (get_cmp_insn_code): Ditto.
>         (expand_vec_cmp): Ditto.
>         (expand_vec_cmp_float): Ditto.
>         (expand_vcond): Ditto.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/rvv.exp: Add RVV comparison testcases.
>         * gcc.target/riscv/rvv/autovec/cmp/vcond-1.c: New test.
>         * gcc.target/riscv/rvv/autovec/cmp/vcond-2.c: New test.
>         * gcc.target/riscv/rvv/autovec/cmp/vcond-3.c: New test.
>         * gcc.target/riscv/rvv/autovec/cmp/vcond_run-1.c: New test.
>         * gcc.target/riscv/rvv/autovec/cmp/vcond_run-2.c: New test.
>         * gcc.target/riscv/rvv/autovec/cmp/vcond_run-3.c: New test.
>
> ---
>  gcc/config/riscv/autovec.md                   | 112 ++++++++
>  gcc/config/riscv/riscv-protos.h               |   7 +
>  gcc/config/riscv/riscv-v.cc                   | 266 +++++++++++++++++-
>  .../riscv/rvv/autovec/cmp/vcond-1.c           | 157 +++++++++++
>  .../riscv/rvv/autovec/cmp/vcond-2.c           |  75 +++++
>  .../riscv/rvv/autovec/cmp/vcond-3.c           |  13 +
>  .../riscv/rvv/autovec/cmp/vcond_run-1.c       |  49 ++++
>  .../riscv/rvv/autovec/cmp/vcond_run-2.c       |  76 +++++
>  .../riscv/rvv/autovec/cmp/vcond_run-3.c       |   6 +
>  gcc/testsuite/gcc.target/riscv/rvv/rvv.exp    |   2 +
>  10 files changed, 756 insertions(+), 7 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-3.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-1.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-2.c
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-3.c
>
> diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
> index 04b4459222a..e0258e8b798 100644
> --- a/gcc/config/riscv/autovec.md
> +++ b/gcc/config/riscv/autovec.md
> @@ -162,3 +162,115 @@
>                                  riscv_vector::RVV_BINOP, operands);
>    DONE;
>  })
> +
> +;; =========================================================================
> +;; == Comparisons and selects
> +;; =========================================================================
> +
> +;; -------------------------------------------------------------------------
> +;; ---- [INT,FP] Select based on masks
> +;; -------------------------------------------------------------------------
> +;; Includes merging patterns for:
> +;; - vmerge.vv
> +;; - vmerge.vx
> +;; - vfmerge.vf
> +;; -------------------------------------------------------------------------
> +
> +(define_expand "@vcond_mask_<mode><vm>"
> +  [(match_operand:V 0 "register_operand")
> +   (match_operand:<VM> 3 "register_operand")
> +   (match_operand:V 1 "nonmemory_operand")
> +   (match_operand:V 2 "register_operand")]
> +  "TARGET_VECTOR"
> +  {
> +    /* The order of vcond_mask is opposite to pred_merge.  */
> +    std::swap (operands[1], operands[2]);
> +    riscv_vector::emit_vlmax_merge_insn (code_for_pred_merge (<MODE>mode),
> +                       riscv_vector::RVV_MERGE_OP, operands);
> +    DONE;
> +  }
> +)
> +
> +;; -------------------------------------------------------------------------
> +;; ---- [INT,FP] Comparisons
> +;; -------------------------------------------------------------------------
> +;; Includes:
> +;; - vms<eq/ne/ltu/lt/leu/le/gtu/gt>.<vv/vx/vi>
> +;; -------------------------------------------------------------------------
> +
> +(define_expand "vec_cmp<mode><vm>"
> +  [(set (match_operand:<VM> 0 "register_operand")
> +       (match_operator:<VM> 1 "comparison_operator"
> +         [(match_operand:VI 2 "register_operand")
> +          (match_operand:VI 3 "register_operand")]))]
> +  "TARGET_VECTOR"
> +  {
> +    riscv_vector::expand_vec_cmp (operands[0], GET_CODE (operands[1]),
> +                                 operands[2], operands[3]);
> +    DONE;
> +  }
> +)
> +
> +(define_expand "vec_cmpu<mode><vm>"
> +  [(set (match_operand:<VM> 0 "register_operand")
> +       (match_operator:<VM> 1 "comparison_operator"
> +         [(match_operand:VI 2 "register_operand")
> +          (match_operand:VI 3 "register_operand")]))]
> +  "TARGET_VECTOR"
> +  {
> +    riscv_vector::expand_vec_cmp (operands[0], GET_CODE (operands[1]),
> +                                 operands[2], operands[3]);
> +    DONE;
> +  }
> +)
> +
> +(define_expand "vec_cmp<mode><vm>"
> +  [(set (match_operand:<VM> 0 "register_operand")
> +       (match_operator:<VM> 1 "comparison_operator"
> +         [(match_operand:VF 2 "register_operand")
> +          (match_operand:VF 3 "register_operand")]))]
> +  "TARGET_VECTOR"
> +  {
> +    riscv_vector::expand_vec_cmp_float (operands[0], GET_CODE (operands[1]),
> +                                       operands[2], operands[3], false);
> +    DONE;
> +  }
> +)
> +
> +;; -------------------------------------------------------------------------
> +;; ---- [INT,FP] Compare and select
> +;; -------------------------------------------------------------------------
> +;; The patterns in this section are synthetic.
> +;; -------------------------------------------------------------------------
> +
> +(define_expand "vcond<V:mode><VI:mode>"
> +  [(set (match_operand:V 0 "register_operand")
> +       (if_then_else:V
> +         (match_operator 3 "comparison_operator"
> +           [(match_operand:VI 4 "register_operand")
> +            (match_operand:VI 5 "register_operand")])
> +         (match_operand:V 1 "register_operand")
> +         (match_operand:V 2 "register_operand")))]
> +  "TARGET_VECTOR && known_eq (GET_MODE_NUNITS (<V:MODE>mode),
> +               GET_MODE_NUNITS (<VI:MODE>mode))"
> +  {
> +    riscv_vector::expand_vcond (operands);
> +    DONE;
> +  }
> +)
> +
> +(define_expand "vcondu<V:mode><VI:mode>"
> +  [(set (match_operand:V 0 "register_operand")
> +       (if_then_else:V
> +         (match_operator 3 "comparison_operator"
> +           [(match_operand:VI 4 "register_operand")
> +            (match_operand:VI 5 "register_operand")])
> +         (match_operand:V 1 "register_operand")
> +         (match_operand:V 2 "register_operand")))]
> +  "TARGET_VECTOR && known_eq (GET_MODE_NUNITS (<V:MODE>mode),
> +               GET_MODE_NUNITS (<VI:MODE>mode))"
> +  {
> +    riscv_vector::expand_vcond (operands);
> +    DONE;
> +  }
> +)
> diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
> index 0ae4656befb..58e55c234cb 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -137,6 +137,8 @@ enum insn_type
>    RVV_MISC_OP = 1,
>    RVV_UNOP = 2,
>    RVV_BINOP = 3,
> +  RVV_MERGE_OP = 4,
> +  RVV_CMP_OP = 4,
>  };
>  enum vlmul_type
>  {
> @@ -174,6 +176,8 @@ void emit_vlmax_vsetvl (machine_mode, rtx);
>  void emit_hard_vlmax_vsetvl (machine_mode, rtx);
>  void emit_vlmax_insn (unsigned, int, rtx *);
>  void emit_nonvlmax_insn (unsigned, int, rtx *);
> +void emit_vlmax_merge_insn (unsigned, int, rtx *);
> +void emit_vlmax_cmp_insn (unsigned, int, rtx *);
>  enum vlmul_type get_vlmul (machine_mode);
>  unsigned int get_ratio (machine_mode);
>  unsigned int get_nf (machine_mode);
> @@ -204,6 +208,8 @@ bool simm5_p (rtx);
>  bool neg_simm5_p (rtx);
>  #ifdef RTX_CODE
>  bool has_vi_variant_p (rtx_code, rtx);
> +void expand_vec_cmp (rtx, rtx_code, rtx, rtx);
> +bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
>  #endif
>  bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
>                           bool, void (*)(rtx *, rtx));
> @@ -226,6 +232,7 @@ machine_mode preferred_simd_mode (scalar_mode);
>  opt_machine_mode get_mask_mode (machine_mode);
>  void expand_vec_series (rtx, rtx, rtx);
>  void expand_vec_init (rtx, rtx);
> +void expand_vcond (rtx *);
>  /* Rounding mode bitfield for fixed point VXRM.  */
>  enum vxrm_field_enum
>  {
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 478a052a779..e7c0ec226cf 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -169,7 +169,7 @@ public:
>
>      if (m_needs_avl_p)
>        {
> -       rtx len = ops[m_op_num];
> +       rtx len;
>         if (m_vlmax_p)
>           {
>             if (const_vlmax_p (m_dest_mode))
> @@ -185,6 +185,16 @@ public:
>                 len = gen_reg_rtx (Pmode);
>                 emit_vlmax_vsetvl (m_dest_mode, len);
>               }
> +           else
> +             {
> +               gcc_assert (ops[m_op_num]);
> +               len = ops[m_op_num];
> +             }
> +         }
> +       else
> +         {
> +           gcc_assert (ops[m_op_num]);
> +           len = ops[m_op_num];
>           }
>         add_input_operand (len, Pmode);
>        }
> @@ -341,15 +351,15 @@ autovec_use_vlmax_p (void)
>  void
>  emit_vlmax_insn (unsigned icode, int op_num, rtx *ops)
>  {
> -  machine_mode data_mode = GET_MODE (ops[0]);
> -  machine_mode mask_mode = get_mask_mode (data_mode).require ();
> +  machine_mode dest_mode = GET_MODE (ops[0]);
> +  machine_mode mask_mode = get_mask_mode (dest_mode).require ();
>    /* We have a maximum of 11 operands for RVV instruction patterns according to
>     * vector.md.  */
>    insn_expander<11> e (/*OP_NUM*/ op_num, /*HAS_DEST_P*/ true,
>                        /*FULLY_UNMASKED_P*/ true,
>                        /*USE_REAL_MERGE_P*/ false, /*HAS_AVL_P*/ true,
>                        /*VLMAX_P*/ true,
> -                      /*DEST_MODE*/ data_mode, /*MASK_MODE*/ mask_mode);
> +                      /*DEST_MODE*/ dest_mode, /*MASK_MODE*/ mask_mode);
>    e.set_policy (TAIL_ANY);
>    e.set_policy (MASK_ANY);
>    e.emit_insn ((enum insn_code) icode, ops);
> @@ -360,20 +370,52 @@ emit_vlmax_insn (unsigned icode, int op_num, rtx *ops)
>  void
>  emit_nonvlmax_insn (unsigned icode, int op_num, rtx *ops)
>  {
> -  machine_mode data_mode = GET_MODE (ops[0]);
> -  machine_mode mask_mode = get_mask_mode (data_mode).require ();
> +  machine_mode dest_mode = GET_MODE (ops[0]);
> +  machine_mode mask_mode = get_mask_mode (dest_mode).require ();
>    /* We have a maximum of 11 operands for RVV instruction patterns according to
>     * vector.md.  */
>    insn_expander<11> e (/*OP_NUM*/ op_num, /*HAS_DEST_P*/ true,
>                        /*FULLY_UNMASKED_P*/ true,
>                        /*USE_REAL_MERGE_P*/ false, /*HAS_AVL_P*/ true,
>                        /*VLMAX_P*/ false,
> -                      /*DEST_MODE*/ data_mode, /*MASK_MODE*/ mask_mode);
> +                      /*DEST_MODE*/ dest_mode, /*MASK_MODE*/ mask_mode);
>    e.set_policy (TAIL_ANY);
>    e.set_policy (MASK_ANY);
>    e.emit_insn ((enum insn_code) icode, ops);
>  }
>
> +/* This function emits merge instruction.  */
> +void
> +emit_vlmax_merge_insn (unsigned icode, int op_num, rtx *ops)
> +{
> +  machine_mode dest_mode = GET_MODE (ops[0]);
> +  machine_mode mask_mode = get_mask_mode (dest_mode).require ();
> +  insn_expander<11> e (/*OP_NUM*/ op_num, /*HAS_DEST_P*/ true,
> +                      /*FULLY_UNMASKED_P*/ false,
> +                      /*USE_REAL_MERGE_P*/ false, /*HAS_AVL_P*/ true,
> +                      /*VLMAX_P*/ true,
> +                      dest_mode, mask_mode);
> +  e.set_policy (TAIL_ANY);
> +  e.emit_insn ((enum insn_code) icode, ops);
> +}
> +
> +/* This function emits cmp instruction.  */
> +void
> +emit_vlmax_cmp_insn (unsigned icode, int op_num, rtx *ops)
> +{
> +  machine_mode mode = GET_MODE (ops[0]);
> +  bool fully_unmasked_p = op_num == RVV_CMP_OP ? true : false;
> +  bool use_real_merge_p = op_num == RVV_CMP_OP ? false : true;
> +  insn_expander<11> e (/*OP_NUM*/ op_num, /*HAS_DEST_P*/ true,
> +                      /*FULLY_UNMASKED_P*/ fully_unmasked_p,
> +                      /*USE_REAL_MERGE_P*/ use_real_merge_p,
> +                      /*HAS_AVL_P*/ true,
> +                      /*VLMAX_P*/ true,
> +                      /*DEST_MODE*/ mode, /*MASK_MODE*/ mode);
> +  e.set_policy (op_num == RVV_CMP_OP ? MASK_UNDISTURBED : MASK_ANY);
> +  e.emit_insn ((enum insn_code) icode, ops);
> +}
> +
>  /* Expand series const vector.  */
>
>  void
> @@ -1318,4 +1360,214 @@ expand_vec_init (rtx target, rtx vals)
>    expand_vector_init_insert_elems (target, v, nelts);
>  }
>
> +/* Get insn code for corresponding comparison.  */
> +
> +static insn_code
> +get_cmp_insn_code (rtx_code code, machine_mode mode)
> +{
> +  insn_code icode;
> +  switch (code)
> +    {
> +    case EQ:
> +    case NE:
> +    case LE:
> +    case LEU:
> +    case GT:
> +    case GTU:
> +    case LTGT:
> +      icode = code_for_pred_cmp (mode);
> +      break;
> +    case LT:
> +    case LTU:
> +    case GE:
> +    case GEU:
> +      if (FLOAT_MODE_P (mode))
> +       icode = code_for_pred_cmp (mode);
> +      else
> +       icode = code_for_pred_ltge (mode);
> +      break;
> +    default:
> +      gcc_unreachable ();
> +    }
> +  return icode;
> +}
> +
> +/* Expand an RVV comparison.  */
> +
> +void
> +expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1)
> +{
> +  machine_mode mask_mode = GET_MODE (target);
> +  machine_mode data_mode = GET_MODE (op0);
> +  insn_code icode = get_cmp_insn_code (code, data_mode);
> +
> +  if (code == LTGT)
> +    {
> +      rtx lt = gen_reg_rtx (mask_mode);
> +      rtx gt = gen_reg_rtx (mask_mode);
> +      expand_vec_cmp (lt, LT, op0, op1);
> +      expand_vec_cmp (gt, GT, op0, op1);
> +      icode = code_for_pred (IOR, mask_mode);
> +      rtx ops[3] = {target, lt, gt};
> +      emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, ops);
> +      return;
> +    }
> +
> +  rtx cmp = gen_rtx_fmt_ee (code, mask_mode, op0, op1);
> +  rtx ops[RVV_CMP_OP] = {target, cmp, op0, op1};
> +  emit_vlmax_cmp_insn (icode, RVV_CMP_OP, ops);
> +}
> +
> +void
> +expand_vec_cmp (rtx target, rtx_code code, rtx mask, rtx maskoff, rtx op0,
> +               rtx op1)
> +{
> +  machine_mode mask_mode = GET_MODE (target);
> +  machine_mode data_mode = GET_MODE (op0);
> +  insn_code icode = get_cmp_insn_code (code, data_mode);
> +
> +  if (code == LTGT)
> +    {
> +      rtx lt = gen_reg_rtx (mask_mode);
> +      rtx gt = gen_reg_rtx (mask_mode);
> +      expand_vec_cmp (lt, LT, mask, maskoff, op0, op1);
> +      expand_vec_cmp (gt, GT, mask, maskoff, op0, op1);
> +      icode = code_for_pred (IOR, mask_mode);
> +      rtx ops[RVV_BINOP] = {target, lt, gt};
> +      emit_vlmax_insn (icode, RVV_BINOP, ops);
> +      return;
> +    }
> +
> +  rtx cmp = gen_rtx_fmt_ee (code, mask_mode, op0, op1);
> +  rtx ops[RVV_CMP_OP + 2] = {target, mask, maskoff, cmp, op0, op1};
> +  emit_vlmax_cmp_insn (icode, RVV_CMP_OP + 2, ops);
> +}
> +
> +/* Expand an RVV floating-point comparison:
> +
> +   If CAN_INVERT_P is true, the caller can also handle inverted results;
> +   return true if the result is in fact inverted.  */
> +
> +bool
> +expand_vec_cmp_float (rtx target, rtx_code code, rtx op0, rtx op1,
> +                     bool can_invert_p)
> +{
> +  machine_mode mask_mode = GET_MODE (target);
> +  machine_mode data_mode = GET_MODE (op0);
> +
> +  /* If can_invert_p = true:
> +     It suffices to implement a u>= b as !(a < b) but with the NaNs masked off:
> +
> +       vmfeq.vv    v0, va, va
> +       vmfeq.vv    v1, vb, vb
> +       vmand.mm    v0, v0, v1
> +       vmflt.vv    v0, va, vb, v0.t
> +       vmnot.m     v0, v0
> +
> +     And, if !HONOR_SNANS, then you can remove the vmand.mm by masking the
> +     second vmfeq.vv:
> +
> +       vmfeq.vv    v0, va, va
> +       vmfeq.vv    v0, vb, vb, v0.t
> +       vmflt.vv    v0, va, vb, v0.t
> +       vmnot.m     v0, v0
> +
> +     If can_invert_p = false:
> +
> +       # Example of implementing isgreater()
> +       vmfeq.vv v0, va, va        # Only set where A is not NaN.
> +       vmfeq.vv v1, vb, vb        # Only set where B is not NaN.
> +       vmand.mm v0, v0, v1        # Only set where A and B are ordered,
> +       vmfgt.vv v0, va, vb, v0.t  #  so only set flags on ordered values.
> +  */
> +
> +  rtx eq0 = gen_reg_rtx (mask_mode);
> +  rtx eq1 = gen_reg_rtx (mask_mode);
> +  switch (code)
> +    {
> +    case EQ:
> +    case NE:
> +    case LT:
> +    case LE:
> +    case GT:
> +    case GE:
> +    case LTGT:
> +      /* There is native support for the comparison.  */
> +      expand_vec_cmp (target, code, op0, op1);
> +      return false;
> +    case UNEQ:
> +    case ORDERED:
> +    case UNORDERED:
> +    case UNLT:
> +    case UNLE:
> +    case UNGT:
> +    case UNGE:
> +      /* vmfeq.vv v0, va, va  */
> +      expand_vec_cmp (eq0, EQ, op0, op0);
> +      if (HONOR_SNANS (data_mode))
> +       {
> +         /*
> +            vmfeq.vv    v1, vb, vb
> +            vmand.mm    v0, v0, v1
> +         */
> +         expand_vec_cmp (eq1, EQ, op1, op1);
> +         insn_code icode = code_for_pred (AND, mask_mode);
> +         rtx ops[3] = {eq0, eq0, eq1};
> +         emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, ops);
> +       }
> +      else
> +       {
> +         /* vmfeq.vv    v0, vb, vb, v0.t  */
> +         expand_vec_cmp (eq0, EQ, eq0, eq0, op1, op1);
> +       }
> +      break;
> +    default:
> +      gcc_unreachable ();
> +    }
> +
> +  if (code == ORDERED)
> +    {
> +      emit_move_insn (target, eq0);
> +      return false;
> +    }
> +
> +  /* There is native support for the inverse comparison.  */
> +  code = reverse_condition_maybe_unordered (code);
> +  if (code == ORDERED)
> +    emit_move_insn (target, eq0);
> +  else
> +    expand_vec_cmp (eq0, code, eq0, eq0, op0, op1);
> +
> +  if (can_invert_p)
> +    {
> +      emit_move_insn (target, eq0);
> +      return true;
> +    }
> +  insn_code icode = code_for_pred_not (mask_mode);
> +  rtx ops[RVV_UNOP] = {target, eq0};
> +  emit_vlmax_insn (icode, RVV_UNOP, ops);
> +  return false;
> +}
> +
> +/* Expand an RVV vcond pattern with operands OPS.  DATA_MODE is the mode
> +   of the data being merged and CMP_MODE is the mode of the values being
> +   compared.  */
> +
> +void
> +expand_vcond (rtx *ops)
> +{
> +  machine_mode cmp_mode = GET_MODE (ops[4]);
> +  machine_mode data_mode = GET_MODE (ops[1]);
> +  machine_mode mask_mode = get_mask_mode (cmp_mode).require ();
> +  rtx mask = gen_reg_rtx (mask_mode);
> +  if (FLOAT_MODE_P (cmp_mode))
> +    {
> +      if (expand_vec_cmp_float (mask, GET_CODE (ops[3]), ops[4], ops[5], true))
> +       std::swap (ops[1], ops[2]);
> +    }
> +  else
> +    expand_vec_cmp (mask, GET_CODE (ops[3]), ops[4], ops[5]);
> +  emit_insn (gen_vcond_mask (data_mode, data_mode, ops[0], ops[1], ops[2], mask));
> +}
> +
>  } // namespace riscv_vector
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
> new file mode 100644
> index 00000000000..c882654cb49
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
> @@ -0,0 +1,157 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
> +
> +#include <stdint-gcc.h>
> +
> +#define DEF_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX)       \
> +  void __attribute__ ((noinline, noclone))                     \
> +  vcond_var_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r,  \
> +                                  DATA_TYPE *__restrict__ x,   \
> +                                  DATA_TYPE *__restrict__ y,   \
> +                                  CMP_TYPE *__restrict__ a,    \
> +                                  CMP_TYPE *__restrict__ b,    \
> +                                  int n)                       \
> +  {                                                            \
> +    for (int i = 0; i < n; i++)                                        \
> +      {                                                                \
> +       DATA_TYPE xval = x[i], yval = y[i];                     \
> +       CMP_TYPE aval = a[i], bval = b[i];                      \
> +       r[i] = aval COND bval ? xval : yval;                    \
> +      }                                                                \
> +  }
> +
> +#define DEF_VCOND_IMM(DATA_TYPE, CMP_TYPE, COND, IMM, SUFFIX)  \
> +  void __attribute__ ((noinline, noclone))                     \
> +  vcond_imm_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r,  \
> +                                  DATA_TYPE *__restrict__ x,   \
> +                                  DATA_TYPE *__restrict__ y,   \
> +                                  CMP_TYPE *__restrict__ a,    \
> +                                  int n)                       \
> +  {                                                            \
> +    for (int i = 0; i < n; i++)                                        \
> +      {                                                                \
> +       DATA_TYPE xval = x[i], yval = y[i];                     \
> +       CMP_TYPE aval = a[i];                                   \
> +       r[i] = aval COND (CMP_TYPE) IMM ? xval : yval;          \
> +      }                                                                \
> +  }
> +
> +#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX)      \
> +  T (int8_t, int8_t, COND, SUFFIX)                     \
> +  T (int16_t, int16_t, COND, SUFFIX)                   \
> +  T (int32_t, int32_t, COND, SUFFIX)                   \
> +  T (int64_t, int64_t, COND, SUFFIX)                   \
> +  T (float, int32_t, COND, SUFFIX##_float)             \
> +  T (double, int64_t, COND, SUFFIX##_double)
> +
> +#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX)    \
> +  T (uint8_t, uint8_t, COND, SUFFIX)                   \
> +  T (uint16_t, uint16_t, COND, SUFFIX)                 \
> +  T (uint32_t, uint32_t, COND, SUFFIX)                 \
> +  T (uint64_t, uint64_t, COND, SUFFIX)                 \
> +  T (float, uint32_t, COND, SUFFIX##_float)            \
> +  T (double, uint64_t, COND, SUFFIX##_double)
> +
> +#define TEST_COND_VAR_ALL(T, COND, SUFFIX)     \
> +  TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX)   \
> +  TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX)
> +
> +#define TEST_VAR_ALL(T)                                \
> +  TEST_COND_VAR_ALL (T, >, _gt)                        \
> +  TEST_COND_VAR_ALL (T, <, _lt)                        \
> +  TEST_COND_VAR_ALL (T, >=, _ge)               \
> +  TEST_COND_VAR_ALL (T, <=, _le)               \
> +  TEST_COND_VAR_ALL (T, ==, _eq)               \
> +  TEST_COND_VAR_ALL (T, !=, _ne)
> +
> +#define TEST_COND_IMM_SIGNED_ALL(T, COND, IMM, SUFFIX) \
> +  T (int8_t, int8_t, COND, IMM, SUFFIX)                        \
> +  T (int16_t, int16_t, COND, IMM, SUFFIX)              \
> +  T (int32_t, int32_t, COND, IMM, SUFFIX)              \
> +  T (int64_t, int64_t, COND, IMM, SUFFIX)              \
> +  T (float, int32_t, COND, IMM, SUFFIX##_float)                \
> +  T (double, int64_t, COND, IMM, SUFFIX##_double)
> +
> +#define TEST_COND_IMM_UNSIGNED_ALL(T, COND, IMM, SUFFIX)       \
> +  T (uint8_t, uint8_t, COND, IMM, SUFFIX)                      \
> +  T (uint16_t, uint16_t, COND, IMM, SUFFIX)                    \
> +  T (uint32_t, uint32_t, COND, IMM, SUFFIX)                    \
> +  T (uint64_t, uint64_t, COND, IMM, SUFFIX)                    \
> +  T (float, uint32_t, COND, IMM, SUFFIX##_float)               \
> +  T (double, uint64_t, COND, IMM, SUFFIX##_double)
> +
> +#define TEST_COND_IMM_ALL(T, COND, IMM, SUFFIX)                \
> +  TEST_COND_IMM_SIGNED_ALL (T, COND, IMM, SUFFIX)      \
> +  TEST_COND_IMM_UNSIGNED_ALL (T, COND, IMM, SUFFIX)
> +
> +#define TEST_IMM_ALL(T)                                                        \
> +  /* Expect immediates to make it into the encoding.  */               \
> +  TEST_COND_IMM_ALL (T, >, 5, _gt)                                     \
> +  TEST_COND_IMM_ALL (T, <, 5, _lt)                                     \
> +  TEST_COND_IMM_ALL (T, >=, 5, _ge)                                    \
> +  TEST_COND_IMM_ALL (T, <=, 5, _le)                                    \
> +  TEST_COND_IMM_ALL (T, ==, 5, _eq)                                    \
> +  TEST_COND_IMM_ALL (T, !=, 5, _ne)                                    \
> +                                                                       \
> +  TEST_COND_IMM_SIGNED_ALL (T, >, 15, _gt2)                            \
> +  TEST_COND_IMM_SIGNED_ALL (T, <, 15, _lt2)                            \
> +  TEST_COND_IMM_SIGNED_ALL (T, >=, 15, _ge2)                           \
> +  TEST_COND_IMM_SIGNED_ALL (T, <=, 15, _le2)                           \
> +  TEST_COND_IMM_ALL (T, ==, 15, _eq2)                                  \
> +  TEST_COND_IMM_ALL (T, !=, 15, _ne2)                                  \
> +                                                                       \
> +  TEST_COND_IMM_SIGNED_ALL (T, >, 16, _gt3)                            \
> +  TEST_COND_IMM_SIGNED_ALL (T, <, 16, _lt3)                            \
> +  TEST_COND_IMM_SIGNED_ALL (T, >=, 16, _ge3)                           \
> +  TEST_COND_IMM_SIGNED_ALL (T, <=, 16, _le3)                           \
> +  TEST_COND_IMM_ALL (T, ==, 16, _eq3)                                  \
> +  TEST_COND_IMM_ALL (T, !=, 16, _ne3)                                  \
> +                                                                       \
> +  TEST_COND_IMM_SIGNED_ALL (T, >, -16, _gt4)                           \
> +  TEST_COND_IMM_SIGNED_ALL (T, <, -16, _lt4)                           \
> +  TEST_COND_IMM_SIGNED_ALL (T, >=, -16, _ge4)                          \
> +  TEST_COND_IMM_SIGNED_ALL (T, <=, -16, _le4)                          \
> +  TEST_COND_IMM_ALL (T, ==, -16, _eq4)                                 \
> +  TEST_COND_IMM_ALL (T, !=, -16, _ne4)                                 \
> +                                                                       \
> +  TEST_COND_IMM_SIGNED_ALL (T, >, -17, _gt5)                           \
> +  TEST_COND_IMM_SIGNED_ALL (T, <, -17, _lt5)                           \
> +  TEST_COND_IMM_SIGNED_ALL (T, >=, -17, _ge5)                          \
> +  TEST_COND_IMM_SIGNED_ALL (T, <=, -17, _le5)                          \
> +  TEST_COND_IMM_ALL (T, ==, -17, _eq5)                                 \
> +  TEST_COND_IMM_ALL (T, !=, -17, _ne5)                                 \
> +                                                                       \
> +  TEST_COND_IMM_UNSIGNED_ALL (T, >, 0, _gt6)                           \
> +  /* Testing if an unsigned value >= 0 or < 0 is pointless as it will  \
> +     get folded away by the compiler.  */                              \
> +  TEST_COND_IMM_UNSIGNED_ALL (T, <=, 0, _le6)                          \
> +                                                                       \
> +  TEST_COND_IMM_UNSIGNED_ALL (T, >, 127, _gt7)                         \
> +  TEST_COND_IMM_UNSIGNED_ALL (T, <, 127, _lt7)                         \
> +  TEST_COND_IMM_UNSIGNED_ALL (T, >=, 127, _ge7)                                \
> +  TEST_COND_IMM_UNSIGNED_ALL (T, <=, 127, _le7)                                \
> +                                                                       \
> +  /* Expect immediates to NOT make it into the encoding, and instead be \
> +     forced into a register.  */                                       \
> +  TEST_COND_IMM_UNSIGNED_ALL (T, >, 128, _gt8)                         \
> +  TEST_COND_IMM_UNSIGNED_ALL (T, <, 128, _lt8)                         \
> +  TEST_COND_IMM_UNSIGNED_ALL (T, >=, 128, _ge8)                                \
> +  TEST_COND_IMM_UNSIGNED_ALL (T, <=, 128, _le8)
> +
> +TEST_VAR_ALL (DEF_VCOND_VAR)
> +TEST_IMM_ALL (DEF_VCOND_IMM)
> +
> +/* { dg-final { scan-assembler-times {\tvmseq\.vi} 42 } } */
> +/* { dg-final { scan-assembler-times {\tvmsne\.vi} 42 } } */
> +/* { dg-final { scan-assembler-times {\tvmsgt\.vi} 30 } } */
> +/* { dg-final { scan-assembler-times {\tvmsgtu\.vi} 12 } } */
> +/* { dg-final { scan-assembler-times {\tvmslt\.vi} 8 } } */
> +/* { dg-final { scan-assembler-times {\tvmsge\.vi} 8 } } */
> +/* { dg-final { scan-assembler-times {\tvmsle\.vi} 30 } } */
> +/* { dg-final { scan-assembler-times {\tvmsleu\.vi} 12 } } */
> +/* { dg-final { scan-assembler-times {\tvmseq} 78 } } */
> +/* { dg-final { scan-assembler-times {\tvmsne} 78 } } */
> +/* { dg-final { scan-assembler-times {\tvmsgt} 82 } } */
> +/* { dg-final { scan-assembler-times {\tvmslt} 38 } } */
> +/* { dg-final { scan-assembler-times {\tvmsge} 38 } } */
> +/* { dg-final { scan-assembler-times {\tvmsle} 82 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-2.c
> new file mode 100644
> index 00000000000..738f978c5a1
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-2.c
> @@ -0,0 +1,75 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
> +
> +#include <stdint-gcc.h>
> +
> +#define eq(A, B) ((A) == (B))
> +#define ne(A, B) ((A) != (B))
> +#define olt(A, B) ((A) < (B))
> +#define ole(A, B) ((A) <= (B))
> +#define oge(A, B) ((A) >= (B))
> +#define ogt(A, B) ((A) > (B))
> +#define ordered(A, B) (!__builtin_isunordered (A, B))
> +#define unordered(A, B) (__builtin_isunordered (A, B))
> +#define ueq(A, B) (!__builtin_islessgreater (A, B))
> +#define ult(A, B) (__builtin_isless (A, B))
> +#define ule(A, B) (__builtin_islessequal (A, B))
> +#define uge(A, B) (__builtin_isgreaterequal (A, B))
> +#define ugt(A, B) (__builtin_isgreater (A, B))
> +#define nueq(A, B) (__builtin_islessgreater (A, B))
> +#define nult(A, B) (!__builtin_isless (A, B))
> +#define nule(A, B) (!__builtin_islessequal (A, B))
> +#define nuge(A, B) (!__builtin_isgreaterequal (A, B))
> +#define nugt(A, B) (!__builtin_isgreater (A, B))
> +
> +#define TEST_LOOP(TYPE1, TYPE2, CMP)                           \
> +  void __attribute__ ((noinline, noclone))                     \
> +  test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest,  \
> +                                       TYPE1 *restrict src,    \
> +                                       TYPE1 fallback,         \
> +                                       TYPE2 *restrict a,      \
> +                                       TYPE2 *restrict b,      \
> +                                       int count)              \
> +  {                                                            \
> +    for (int i = 0; i < count; ++i)                            \
> +      {\
> +        TYPE2 aval = a[i]; \
> +        TYPE2 bval = b[i]; \
> +        TYPE1 srcval = src[i]; \
> +        dest[i] = CMP (aval, bval) ? srcval : fallback;                \
> +      }\
> +  }
> +
> +#define TEST_CMP(CMP) \
> +  TEST_LOOP (int32_t, float, CMP) \
> +  TEST_LOOP (uint32_t, float, CMP) \
> +  TEST_LOOP (float, float, CMP) \
> +  TEST_LOOP (int64_t, double, CMP) \
> +  TEST_LOOP (uint64_t, double, CMP) \
> +  TEST_LOOP (double, double, CMP)
> +
> +TEST_CMP (eq)
> +TEST_CMP (ne)
> +TEST_CMP (olt)
> +TEST_CMP (ole)
> +TEST_CMP (oge)
> +TEST_CMP (ogt)
> +TEST_CMP (ordered)
> +TEST_CMP (unordered)
> +TEST_CMP (ueq)
> +TEST_CMP (ult)
> +TEST_CMP (ule)
> +TEST_CMP (uge)
> +TEST_CMP (ugt)
> +TEST_CMP (nueq)
> +TEST_CMP (nult)
> +TEST_CMP (nule)
> +TEST_CMP (nuge)
> +TEST_CMP (nugt)
> +
> +/* { dg-final { scan-assembler-times {\tvmfeq} 150 } } */
> +/* { dg-final { scan-assembler-times {\tvmfne} 6 } } */
> +/* { dg-final { scan-assembler-times {\tvmfgt} 30 } } */
> +/* { dg-final { scan-assembler-times {\tvmflt} 30 } } */
> +/* { dg-final { scan-assembler-times {\tvmfge} 18 } } */
> +/* { dg-final { scan-assembler-times {\tvmfle} 18 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-3.c
> new file mode 100644
> index 00000000000..53384829e64
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-3.c
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-trapping-math" } */
> +
> +/* The difference here is that nueq can use LTGT.  */
> +
> +#include "vcond-2.c"
> +
> +/* { dg-final { scan-assembler-times {\tvmfeq} 90 } } */
> +/* { dg-final { scan-assembler-times {\tvmfne} 6 } } */
> +/* { dg-final { scan-assembler-times {\tvmfgt} 30 } } */
> +/* { dg-final { scan-assembler-times {\tvmflt} 30 } } */
> +/* { dg-final { scan-assembler-times {\tvmfge} 18 } } */
> +/* { dg-final { scan-assembler-times {\tvmfle} 18 } } */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-1.c
> new file mode 100644
> index 00000000000..a84d22d2a73
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-1.c
> @@ -0,0 +1,49 @@
> +/* { dg-do run { target { riscv_vector } } } */
> +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
> +
> +#include "vcond-1.c"
> +
> +#define N 97
> +
> +#define TEST_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX)      \
> +{                                                              \
> +  DATA_TYPE x[N], y[N], r[N];                                  \
> +  CMP_TYPE a[N], b[N];                                         \
> +  for (int i = 0; i < N; ++i)                                  \
> +    {                                                          \
> +      x[i] = i;                                                        \
> +      y[i] = (i & 1) + 5;                                      \
> +      a[i] = i - N / 3;                                                \
> +      b[i] = N - N / 3 - i;                                    \
> +      asm volatile ("" ::: "memory");                          \
> +    }                                                          \
> +  vcond_var_##CMP_TYPE##_##SUFFIX (r, x, y, a, b, N);          \
> +  for (int i = 0; i < N; ++i)                                  \
> +    if (r[i] != (a[i] COND b[i] ? x[i] : y[i]))                        \
> +      __builtin_abort ();                                      \
> +}
> +
> +#define TEST_VCOND_IMM(DATA_TYPE, CMP_TYPE, COND, IMM, SUFFIX) \
> +{                                                              \
> +  DATA_TYPE x[N], y[N], r[N];                                  \
> +  CMP_TYPE a[N];                                               \
> +  for (int i = 0; i < N; ++i)                                  \
> +    {                                                          \
> +      x[i] = i;                                                        \
> +      y[i] = (i & 1) + 5;                                      \
> +      a[i] = IMM - N / 3 + i;                                  \
> +      asm volatile ("" ::: "memory");                          \
> +    }                                                          \
> +  vcond_imm_##CMP_TYPE##_##SUFFIX (r, x, y, a, N);             \
> +  for (int i = 0; i < N; ++i)                                  \
> +    if (r[i] != (a[i] COND (CMP_TYPE) IMM ? x[i] : y[i]))      \
> +      __builtin_abort ();                                      \
> +}
> +
> +int __attribute__ ((optimize (1)))
> +main (int argc, char **argv)
> +{
> +  TEST_VAR_ALL (TEST_VCOND_VAR)
> +  TEST_IMM_ALL (TEST_VCOND_IMM)
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-2.c
> new file mode 100644
> index 00000000000..56fd39f4691
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-2.c
> @@ -0,0 +1,76 @@
> +/* { dg-do run { target { riscv_vector } } } */
> +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
> +/* { dg-require-effective-target fenv_exceptions } */
> +
> +#include "vcond-2.c"
> +
> +#ifndef TEST_EXCEPTIONS
> +#define TEST_EXCEPTIONS 1
> +#endif
> +
> +#include <fenv.h>
> +
> +#define N 401
> +
> +#define RUN_LOOP(TYPE1, TYPE2, CMP, EXPECT_INVALID)                    \
> +  {                                                                    \
> +    TYPE1 dest[N], src[N];                                             \
> +    TYPE2 a[N], b[N];                                                  \
> +    for (int i = 0; i < N; ++i)                                                \
> +      {                                                                        \
> +       src[i] = i * i;                                                 \
> +       if (i % 5 == 0)                                                 \
> +         a[i] = 0;                                                     \
> +       else if (i % 3)                                                 \
> +         a[i] = i * 0.1;                                               \
> +       else                                                            \
> +         a[i] = i;                                                     \
> +       if (i % 7 == 0)                                                 \
> +         b[i] = __builtin_nan ("");                                    \
> +       else if (i % 6)                                                 \
> +         b[i] = i * 0.1;                                               \
> +       else                                                            \
> +         b[i] = i;                                                     \
> +       asm volatile ("" ::: "memory");                                 \
> +      }                                                                        \
> +    feclearexcept (FE_ALL_EXCEPT);                                     \
> +    test_##TYPE1##_##TYPE2##_##CMP##_var (dest, src, 11, a, b, N);     \
> +    if (TEST_EXCEPTIONS                                                        \
> +       && !fetestexcept (FE_INVALID) != !(EXPECT_INVALID))             \
> +      __builtin_abort ();                                              \
> +    for (int i = 0; i < N; ++i)                                                \
> +      if (dest[i] != (CMP (a[i], b[i]) ? src[i] : 11))                 \
> +       __builtin_abort ();                                             \
> +  }
> +
> +#define RUN_CMP(CMP, EXPECT_INVALID) \
> +  RUN_LOOP (int32_t, float, CMP, EXPECT_INVALID) \
> +  RUN_LOOP (uint32_t, float, CMP, EXPECT_INVALID) \
> +  RUN_LOOP (float, float, CMP, EXPECT_INVALID) \
> +  RUN_LOOP (int64_t, double, CMP, EXPECT_INVALID) \
> +  RUN_LOOP (uint64_t, double, CMP, EXPECT_INVALID) \
> +  RUN_LOOP (double, double, CMP, EXPECT_INVALID)
> +
> +int __attribute__ ((optimize (1)))
> +main (void)
> +{
> +  RUN_CMP (eq, 0)
> +  RUN_CMP (ne, 0)
> +  RUN_CMP (olt, 1)
> +  RUN_CMP (ole, 1)
> +  RUN_CMP (oge, 1)
> +  RUN_CMP (ogt, 1)
> +  RUN_CMP (ordered, 0)
> +  RUN_CMP (unordered, 0)
> +  RUN_CMP (ueq, 0)
> +  RUN_CMP (ult, 0)
> +  RUN_CMP (ule, 0)
> +  RUN_CMP (uge, 0)
> +  RUN_CMP (ugt, 0)
> +  RUN_CMP (nueq, 0)
> +  RUN_CMP (nult, 0)
> +  RUN_CMP (nule, 0)
> +  RUN_CMP (nuge, 0)
> +  RUN_CMP (nugt, 0)
> +  return 0;
> +}
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-3.c
> new file mode 100644
> index 00000000000..e50d561bd98
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-3.c
> @@ -0,0 +1,6 @@
> +/* { dg-do run { target { riscv_vector } } } */
> +/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-trapping-math" } */
> +/* { dg-require-effective-target fenv_exceptions } */
> +
> +#define TEST_EXCEPTIONS 0
> +#include "vcond_run-2.c"
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
> index bc99cc0c3cf..9809a421fc8 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
> @@ -63,6 +63,8 @@ foreach op $AUTOVEC_TEST_OPTS {
>      "" "$op"
>    dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/binop/*.\[cS\]]] \
>      "" "$op"
> +  dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/cmp/*.\[cS\]]] \
> +    "" "$op"
>  }
>
>  # VLS-VLMAX tests
> --
> 2.36.3
>
Richard Sandiford May 24, 2023, 9:57 a.m. UTC | #2
Richard Biener <richard.guenther@gmail.com> writes:
> On Tue, May 23, 2023 at 5:05 PM <juzhe.zhong@rivai.ai> wrote:
>>
>> From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
>>
>> This patch enable RVV auto-vectorization including floating-point
>> unorder and order comparison.
>>
>> The testcases are leveraged from Richard.
>> So include Richard as co-author.
>>
>> Co-Authored-By: Richard Sandiford <richard.sandiford@arm.com>
>>
>> gcc/ChangeLog:
>>
>>         * config/riscv/autovec.md (@vcond_mask_<mode><vm>): New pattern.
>>         (vec_cmp<mode><vm>): Ditto.
>>         (vec_cmpu<mode><vm>): Ditto.
>>         (vcond<V:mode><VI:mode>): Ditto.
>>         (vcondu<V:mode><VI:mode>): Ditto.
>
> Unless the ISA really can do compare and select in a single instruction
> I'd advise against adding vcond patterns but instead fully rely on
> vec_cmp_ + vcond_mask_ only.
>
> I've not heard of an ISA implementing vcond with a single instructions
> so vcond* should eventually die ...

Yeah.  The SVE code predates modern ISEL.  I've never gone back to check
whether defining the plain vcond patterns is still necessary, or whether
they could just be dropped.

Richard

>>         * config/riscv/riscv-protos.h (enum insn_type): Add new enum.
>>         (emit_vlmax_merge_insn): New function.
>>         (emit_vlmax_cmp_insn): Ditto.
>>         (expand_vec_cmp): Ditto.
>>         (expand_vec_cmp_float):Ditto.
>>         (expand_vcond):Ditto.
>>         * config/riscv/riscv-v.cc (emit_vlmax_merge_insn): Ditto.
>>         (emit_vlmax_cmp_insn): Ditto.
>>         (get_cmp_insn_code): Ditto.
>>         (expand_vec_cmp): Ditto.
>>         (expand_vec_cmp_float): Ditto.
>>         (expand_vcond): Ditto.
>>
>> gcc/testsuite/ChangeLog:
>>
>>         * gcc.target/riscv/rvv/rvv.exp: Add RVV comparison testcases.
>>         * gcc.target/riscv/rvv/autovec/cmp/vcond-1.c: New test.
>>         * gcc.target/riscv/rvv/autovec/cmp/vcond-2.c: New test.
>>         * gcc.target/riscv/rvv/autovec/cmp/vcond-3.c: New test.
>>         * gcc.target/riscv/rvv/autovec/cmp/vcond_run-1.c: New test.
>>         * gcc.target/riscv/rvv/autovec/cmp/vcond_run-2.c: New test.
>>         * gcc.target/riscv/rvv/autovec/cmp/vcond_run-3.c: New test.
>>
>> ---
>>  gcc/config/riscv/autovec.md                   | 112 ++++++++
>>  gcc/config/riscv/riscv-protos.h               |   7 +
>>  gcc/config/riscv/riscv-v.cc                   | 266 +++++++++++++++++-
>>  .../riscv/rvv/autovec/cmp/vcond-1.c           | 157 +++++++++++
>>  .../riscv/rvv/autovec/cmp/vcond-2.c           |  75 +++++
>>  .../riscv/rvv/autovec/cmp/vcond-3.c           |  13 +
>>  .../riscv/rvv/autovec/cmp/vcond_run-1.c       |  49 ++++
>>  .../riscv/rvv/autovec/cmp/vcond_run-2.c       |  76 +++++
>>  .../riscv/rvv/autovec/cmp/vcond_run-3.c       |   6 +
>>  gcc/testsuite/gcc.target/riscv/rvv/rvv.exp    |   2 +
>>  10 files changed, 756 insertions(+), 7 deletions(-)
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-2.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-3.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-1.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-2.c
>>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-3.c
>>
>> diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
>> index 04b4459222a..e0258e8b798 100644
>> --- a/gcc/config/riscv/autovec.md
>> +++ b/gcc/config/riscv/autovec.md
>> @@ -162,3 +162,115 @@
>>                                  riscv_vector::RVV_BINOP, operands);
>>    DONE;
>>  })
>> +
>> +;; =========================================================================
>> +;; == Comparisons and selects
>> +;; =========================================================================
>> +
>> +;; -------------------------------------------------------------------------
>> +;; ---- [INT,FP] Select based on masks
>> +;; -------------------------------------------------------------------------
>> +;; Includes merging patterns for:
>> +;; - vmerge.vv
>> +;; - vmerge.vx
>> +;; - vfmerge.vf
>> +;; -------------------------------------------------------------------------
>> +
>> +(define_expand "@vcond_mask_<mode><vm>"
>> +  [(match_operand:V 0 "register_operand")
>> +   (match_operand:<VM> 3 "register_operand")
>> +   (match_operand:V 1 "nonmemory_operand")
>> +   (match_operand:V 2 "register_operand")]
>> +  "TARGET_VECTOR"
>> +  {
>> +    /* The order of vcond_mask is opposite to pred_merge.  */
>> +    std::swap (operands[1], operands[2]);
>> +    riscv_vector::emit_vlmax_merge_insn (code_for_pred_merge (<MODE>mode),
>> +                       riscv_vector::RVV_MERGE_OP, operands);
>> +    DONE;
>> +  }
>> +)
>> +
>> +;; -------------------------------------------------------------------------
>> +;; ---- [INT,FP] Comparisons
>> +;; -------------------------------------------------------------------------
>> +;; Includes:
>> +;; - vms<eq/ne/ltu/lt/leu/le/gtu/gt>.<vv/vx/vi>
>> +;; -------------------------------------------------------------------------
>> +
>> +(define_expand "vec_cmp<mode><vm>"
>> +  [(set (match_operand:<VM> 0 "register_operand")
>> +       (match_operator:<VM> 1 "comparison_operator"
>> +         [(match_operand:VI 2 "register_operand")
>> +          (match_operand:VI 3 "register_operand")]))]
>> +  "TARGET_VECTOR"
>> +  {
>> +    riscv_vector::expand_vec_cmp (operands[0], GET_CODE (operands[1]),
>> +                                 operands[2], operands[3]);
>> +    DONE;
>> +  }
>> +)
>> +
>> +(define_expand "vec_cmpu<mode><vm>"
>> +  [(set (match_operand:<VM> 0 "register_operand")
>> +       (match_operator:<VM> 1 "comparison_operator"
>> +         [(match_operand:VI 2 "register_operand")
>> +          (match_operand:VI 3 "register_operand")]))]
>> +  "TARGET_VECTOR"
>> +  {
>> +    riscv_vector::expand_vec_cmp (operands[0], GET_CODE (operands[1]),
>> +                                 operands[2], operands[3]);
>> +    DONE;
>> +  }
>> +)
>> +
>> +(define_expand "vec_cmp<mode><vm>"
>> +  [(set (match_operand:<VM> 0 "register_operand")
>> +       (match_operator:<VM> 1 "comparison_operator"
>> +         [(match_operand:VF 2 "register_operand")
>> +          (match_operand:VF 3 "register_operand")]))]
>> +  "TARGET_VECTOR"
>> +  {
>> +    riscv_vector::expand_vec_cmp_float (operands[0], GET_CODE (operands[1]),
>> +                                       operands[2], operands[3], false);
>> +    DONE;
>> +  }
>> +)
>> +
>> +;; -------------------------------------------------------------------------
>> +;; ---- [INT,FP] Compare and select
>> +;; -------------------------------------------------------------------------
>> +;; The patterns in this section are synthetic.
>> +;; -------------------------------------------------------------------------
>> +
>> +(define_expand "vcond<V:mode><VI:mode>"
>> +  [(set (match_operand:V 0 "register_operand")
>> +       (if_then_else:V
>> +         (match_operator 3 "comparison_operator"
>> +           [(match_operand:VI 4 "register_operand")
>> +            (match_operand:VI 5 "register_operand")])
>> +         (match_operand:V 1 "register_operand")
>> +         (match_operand:V 2 "register_operand")))]
>> +  "TARGET_VECTOR && known_eq (GET_MODE_NUNITS (<V:MODE>mode),
>> +               GET_MODE_NUNITS (<VI:MODE>mode))"
>> +  {
>> +    riscv_vector::expand_vcond (operands);
>> +    DONE;
>> +  }
>> +)
>> +
>> +(define_expand "vcondu<V:mode><VI:mode>"
>> +  [(set (match_operand:V 0 "register_operand")
>> +       (if_then_else:V
>> +         (match_operator 3 "comparison_operator"
>> +           [(match_operand:VI 4 "register_operand")
>> +            (match_operand:VI 5 "register_operand")])
>> +         (match_operand:V 1 "register_operand")
>> +         (match_operand:V 2 "register_operand")))]
>> +  "TARGET_VECTOR && known_eq (GET_MODE_NUNITS (<V:MODE>mode),
>> +               GET_MODE_NUNITS (<VI:MODE>mode))"
>> +  {
>> +    riscv_vector::expand_vcond (operands);
>> +    DONE;
>> +  }
>> +)
>> diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
>> index 0ae4656befb..58e55c234cb 100644
>> --- a/gcc/config/riscv/riscv-protos.h
>> +++ b/gcc/config/riscv/riscv-protos.h
>> @@ -137,6 +137,8 @@ enum insn_type
>>    RVV_MISC_OP = 1,
>>    RVV_UNOP = 2,
>>    RVV_BINOP = 3,
>> +  RVV_MERGE_OP = 4,
>> +  RVV_CMP_OP = 4,
>>  };
>>  enum vlmul_type
>>  {
>> @@ -174,6 +176,8 @@ void emit_vlmax_vsetvl (machine_mode, rtx);
>>  void emit_hard_vlmax_vsetvl (machine_mode, rtx);
>>  void emit_vlmax_insn (unsigned, int, rtx *);
>>  void emit_nonvlmax_insn (unsigned, int, rtx *);
>> +void emit_vlmax_merge_insn (unsigned, int, rtx *);
>> +void emit_vlmax_cmp_insn (unsigned, int, rtx *);
>>  enum vlmul_type get_vlmul (machine_mode);
>>  unsigned int get_ratio (machine_mode);
>>  unsigned int get_nf (machine_mode);
>> @@ -204,6 +208,8 @@ bool simm5_p (rtx);
>>  bool neg_simm5_p (rtx);
>>  #ifdef RTX_CODE
>>  bool has_vi_variant_p (rtx_code, rtx);
>> +void expand_vec_cmp (rtx, rtx_code, rtx, rtx);
>> +bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
>>  #endif
>>  bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
>>                           bool, void (*)(rtx *, rtx));
>> @@ -226,6 +232,7 @@ machine_mode preferred_simd_mode (scalar_mode);
>>  opt_machine_mode get_mask_mode (machine_mode);
>>  void expand_vec_series (rtx, rtx, rtx);
>>  void expand_vec_init (rtx, rtx);
>> +void expand_vcond (rtx *);
>>  /* Rounding mode bitfield for fixed point VXRM.  */
>>  enum vxrm_field_enum
>>  {
>> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
>> index 478a052a779..e7c0ec226cf 100644
>> --- a/gcc/config/riscv/riscv-v.cc
>> +++ b/gcc/config/riscv/riscv-v.cc
>> @@ -169,7 +169,7 @@ public:
>>
>>      if (m_needs_avl_p)
>>        {
>> -       rtx len = ops[m_op_num];
>> +       rtx len;
>>         if (m_vlmax_p)
>>           {
>>             if (const_vlmax_p (m_dest_mode))
>> @@ -185,6 +185,16 @@ public:
>>                 len = gen_reg_rtx (Pmode);
>>                 emit_vlmax_vsetvl (m_dest_mode, len);
>>               }
>> +           else
>> +             {
>> +               gcc_assert (ops[m_op_num]);
>> +               len = ops[m_op_num];
>> +             }
>> +         }
>> +       else
>> +         {
>> +           gcc_assert (ops[m_op_num]);
>> +           len = ops[m_op_num];
>>           }
>>         add_input_operand (len, Pmode);
>>        }
>> @@ -341,15 +351,15 @@ autovec_use_vlmax_p (void)
>>  void
>>  emit_vlmax_insn (unsigned icode, int op_num, rtx *ops)
>>  {
>> -  machine_mode data_mode = GET_MODE (ops[0]);
>> -  machine_mode mask_mode = get_mask_mode (data_mode).require ();
>> +  machine_mode dest_mode = GET_MODE (ops[0]);
>> +  machine_mode mask_mode = get_mask_mode (dest_mode).require ();
>>    /* We have a maximum of 11 operands for RVV instruction patterns according to
>>     * vector.md.  */
>>    insn_expander<11> e (/*OP_NUM*/ op_num, /*HAS_DEST_P*/ true,
>>                        /*FULLY_UNMASKED_P*/ true,
>>                        /*USE_REAL_MERGE_P*/ false, /*HAS_AVL_P*/ true,
>>                        /*VLMAX_P*/ true,
>> -                      /*DEST_MODE*/ data_mode, /*MASK_MODE*/ mask_mode);
>> +                      /*DEST_MODE*/ dest_mode, /*MASK_MODE*/ mask_mode);
>>    e.set_policy (TAIL_ANY);
>>    e.set_policy (MASK_ANY);
>>    e.emit_insn ((enum insn_code) icode, ops);
>> @@ -360,20 +370,52 @@ emit_vlmax_insn (unsigned icode, int op_num, rtx *ops)
>>  void
>>  emit_nonvlmax_insn (unsigned icode, int op_num, rtx *ops)
>>  {
>> -  machine_mode data_mode = GET_MODE (ops[0]);
>> -  machine_mode mask_mode = get_mask_mode (data_mode).require ();
>> +  machine_mode dest_mode = GET_MODE (ops[0]);
>> +  machine_mode mask_mode = get_mask_mode (dest_mode).require ();
>>    /* We have a maximum of 11 operands for RVV instruction patterns according to
>>     * vector.md.  */
>>    insn_expander<11> e (/*OP_NUM*/ op_num, /*HAS_DEST_P*/ true,
>>                        /*FULLY_UNMASKED_P*/ true,
>>                        /*USE_REAL_MERGE_P*/ false, /*HAS_AVL_P*/ true,
>>                        /*VLMAX_P*/ false,
>> -                      /*DEST_MODE*/ data_mode, /*MASK_MODE*/ mask_mode);
>> +                      /*DEST_MODE*/ dest_mode, /*MASK_MODE*/ mask_mode);
>>    e.set_policy (TAIL_ANY);
>>    e.set_policy (MASK_ANY);
>>    e.emit_insn ((enum insn_code) icode, ops);
>>  }
>>
>> +/* This function emits merge instruction.  */
>> +void
>> +emit_vlmax_merge_insn (unsigned icode, int op_num, rtx *ops)
>> +{
>> +  machine_mode dest_mode = GET_MODE (ops[0]);
>> +  machine_mode mask_mode = get_mask_mode (dest_mode).require ();
>> +  insn_expander<11> e (/*OP_NUM*/ op_num, /*HAS_DEST_P*/ true,
>> +                      /*FULLY_UNMASKED_P*/ false,
>> +                      /*USE_REAL_MERGE_P*/ false, /*HAS_AVL_P*/ true,
>> +                      /*VLMAX_P*/ true,
>> +                      dest_mode, mask_mode);
>> +  e.set_policy (TAIL_ANY);
>> +  e.emit_insn ((enum insn_code) icode, ops);
>> +}
>> +
>> +/* This function emits cmp instruction.  */
>> +void
>> +emit_vlmax_cmp_insn (unsigned icode, int op_num, rtx *ops)
>> +{
>> +  machine_mode mode = GET_MODE (ops[0]);
>> +  bool fully_unmasked_p = op_num == RVV_CMP_OP ? true : false;
>> +  bool use_real_merge_p = op_num == RVV_CMP_OP ? false : true;
>> +  insn_expander<11> e (/*OP_NUM*/ op_num, /*HAS_DEST_P*/ true,
>> +                      /*FULLY_UNMASKED_P*/ fully_unmasked_p,
>> +                      /*USE_REAL_MERGE_P*/ use_real_merge_p,
>> +                      /*HAS_AVL_P*/ true,
>> +                      /*VLMAX_P*/ true,
>> +                      /*DEST_MODE*/ mode, /*MASK_MODE*/ mode);
>> +  e.set_policy (op_num == RVV_CMP_OP ? MASK_UNDISTURBED : MASK_ANY);
>> +  e.emit_insn ((enum insn_code) icode, ops);
>> +}
>> +
>>  /* Expand series const vector.  */
>>
>>  void
>> @@ -1318,4 +1360,214 @@ expand_vec_init (rtx target, rtx vals)
>>    expand_vector_init_insert_elems (target, v, nelts);
>>  }
>>
>> +/* Get insn code for corresponding comparison.  */
>> +
>> +static insn_code
>> +get_cmp_insn_code (rtx_code code, machine_mode mode)
>> +{
>> +  insn_code icode;
>> +  switch (code)
>> +    {
>> +    case EQ:
>> +    case NE:
>> +    case LE:
>> +    case LEU:
>> +    case GT:
>> +    case GTU:
>> +    case LTGT:
>> +      icode = code_for_pred_cmp (mode);
>> +      break;
>> +    case LT:
>> +    case LTU:
>> +    case GE:
>> +    case GEU:
>> +      if (FLOAT_MODE_P (mode))
>> +       icode = code_for_pred_cmp (mode);
>> +      else
>> +       icode = code_for_pred_ltge (mode);
>> +      break;
>> +    default:
>> +      gcc_unreachable ();
>> +    }
>> +  return icode;
>> +}
>> +
>> +/* Expand an RVV comparison.  */
>> +
>> +void
>> +expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1)
>> +{
>> +  machine_mode mask_mode = GET_MODE (target);
>> +  machine_mode data_mode = GET_MODE (op0);
>> +  insn_code icode = get_cmp_insn_code (code, data_mode);
>> +
>> +  if (code == LTGT)
>> +    {
>> +      rtx lt = gen_reg_rtx (mask_mode);
>> +      rtx gt = gen_reg_rtx (mask_mode);
>> +      expand_vec_cmp (lt, LT, op0, op1);
>> +      expand_vec_cmp (gt, GT, op0, op1);
>> +      icode = code_for_pred (IOR, mask_mode);
>> +      rtx ops[3] = {target, lt, gt};
>> +      emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, ops);
>> +      return;
>> +    }
>> +
>> +  rtx cmp = gen_rtx_fmt_ee (code, mask_mode, op0, op1);
>> +  rtx ops[RVV_CMP_OP] = {target, cmp, op0, op1};
>> +  emit_vlmax_cmp_insn (icode, RVV_CMP_OP, ops);
>> +}
>> +
>> +void
>> +expand_vec_cmp (rtx target, rtx_code code, rtx mask, rtx maskoff, rtx op0,
>> +               rtx op1)
>> +{
>> +  machine_mode mask_mode = GET_MODE (target);
>> +  machine_mode data_mode = GET_MODE (op0);
>> +  insn_code icode = get_cmp_insn_code (code, data_mode);
>> +
>> +  if (code == LTGT)
>> +    {
>> +      rtx lt = gen_reg_rtx (mask_mode);
>> +      rtx gt = gen_reg_rtx (mask_mode);
>> +      expand_vec_cmp (lt, LT, mask, maskoff, op0, op1);
>> +      expand_vec_cmp (gt, GT, mask, maskoff, op0, op1);
>> +      icode = code_for_pred (IOR, mask_mode);
>> +      rtx ops[RVV_BINOP] = {target, lt, gt};
>> +      emit_vlmax_insn (icode, RVV_BINOP, ops);
>> +      return;
>> +    }
>> +
>> +  rtx cmp = gen_rtx_fmt_ee (code, mask_mode, op0, op1);
>> +  rtx ops[RVV_CMP_OP + 2] = {target, mask, maskoff, cmp, op0, op1};
>> +  emit_vlmax_cmp_insn (icode, RVV_CMP_OP + 2, ops);
>> +}
>> +
>> +/* Expand an RVV floating-point comparison:
>> +
>> +   If CAN_INVERT_P is true, the caller can also handle inverted results;
>> +   return true if the result is in fact inverted.  */
>> +
>> +bool
>> +expand_vec_cmp_float (rtx target, rtx_code code, rtx op0, rtx op1,
>> +                     bool can_invert_p)
>> +{
>> +  machine_mode mask_mode = GET_MODE (target);
>> +  machine_mode data_mode = GET_MODE (op0);
>> +
>> +  /* If can_invert_p = true:
>> +     It suffices to implement a u>= b as !(a < b) but with the NaNs masked off:
>> +
>> +       vmfeq.vv    v0, va, va
>> +       vmfeq.vv    v1, vb, vb
>> +       vmand.mm    v0, v0, v1
>> +       vmflt.vv    v0, va, vb, v0.t
>> +       vmnot.m     v0, v0
>> +
>> +     And, if !HONOR_SNANS, then you can remove the vmand.mm by masking the
>> +     second vmfeq.vv:
>> +
>> +       vmfeq.vv    v0, va, va
>> +       vmfeq.vv    v0, vb, vb, v0.t
>> +       vmflt.vv    v0, va, vb, v0.t
>> +       vmnot.m     v0, v0
>> +
>> +     If can_invert_p = false:
>> +
>> +       # Example of implementing isgreater()
>> +       vmfeq.vv v0, va, va        # Only set where A is not NaN.
>> +       vmfeq.vv v1, vb, vb        # Only set where B is not NaN.
>> +       vmand.mm v0, v0, v1        # Only set where A and B are ordered,
>> +       vmfgt.vv v0, va, vb, v0.t  #  so only set flags on ordered values.
>> +  */
>> +
>> +  rtx eq0 = gen_reg_rtx (mask_mode);
>> +  rtx eq1 = gen_reg_rtx (mask_mode);
>> +  switch (code)
>> +    {
>> +    case EQ:
>> +    case NE:
>> +    case LT:
>> +    case LE:
>> +    case GT:
>> +    case GE:
>> +    case LTGT:
>> +      /* There is native support for the comparison.  */
>> +      expand_vec_cmp (target, code, op0, op1);
>> +      return false;
>> +    case UNEQ:
>> +    case ORDERED:
>> +    case UNORDERED:
>> +    case UNLT:
>> +    case UNLE:
>> +    case UNGT:
>> +    case UNGE:
>> +      /* vmfeq.vv v0, va, va  */
>> +      expand_vec_cmp (eq0, EQ, op0, op0);
>> +      if (HONOR_SNANS (data_mode))
>> +       {
>> +         /*
>> +            vmfeq.vv    v1, vb, vb
>> +            vmand.mm    v0, v0, v1
>> +         */
>> +         expand_vec_cmp (eq1, EQ, op1, op1);
>> +         insn_code icode = code_for_pred (AND, mask_mode);
>> +         rtx ops[3] = {eq0, eq0, eq1};
>> +         emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, ops);
>> +       }
>> +      else
>> +       {
>> +         /* vmfeq.vv    v0, vb, vb, v0.t  */
>> +         expand_vec_cmp (eq0, EQ, eq0, eq0, op1, op1);
>> +       }
>> +      break;
>> +    default:
>> +      gcc_unreachable ();
>> +    }
>> +
>> +  if (code == ORDERED)
>> +    {
>> +      emit_move_insn (target, eq0);
>> +      return false;
>> +    }
>> +
>> +  /* There is native support for the inverse comparison.  */
>> +  code = reverse_condition_maybe_unordered (code);
>> +  if (code == ORDERED)
>> +    emit_move_insn (target, eq0);
>> +  else
>> +    expand_vec_cmp (eq0, code, eq0, eq0, op0, op1);
>> +
>> +  if (can_invert_p)
>> +    {
>> +      emit_move_insn (target, eq0);
>> +      return true;
>> +    }
>> +  insn_code icode = code_for_pred_not (mask_mode);
>> +  rtx ops[RVV_UNOP] = {target, eq0};
>> +  emit_vlmax_insn (icode, RVV_UNOP, ops);
>> +  return false;
>> +}
>> +
>> +/* Expand an RVV vcond pattern with operands OPS.  DATA_MODE is the mode
>> +   of the data being merged and CMP_MODE is the mode of the values being
>> +   compared.  */
>> +
>> +void
>> +expand_vcond (rtx *ops)
>> +{
>> +  machine_mode cmp_mode = GET_MODE (ops[4]);
>> +  machine_mode data_mode = GET_MODE (ops[1]);
>> +  machine_mode mask_mode = get_mask_mode (cmp_mode).require ();
>> +  rtx mask = gen_reg_rtx (mask_mode);
>> +  if (FLOAT_MODE_P (cmp_mode))
>> +    {
>> +      if (expand_vec_cmp_float (mask, GET_CODE (ops[3]), ops[4], ops[5], true))
>> +       std::swap (ops[1], ops[2]);
>> +    }
>> +  else
>> +    expand_vec_cmp (mask, GET_CODE (ops[3]), ops[4], ops[5]);
>> +  emit_insn (gen_vcond_mask (data_mode, data_mode, ops[0], ops[1], ops[2], mask));
>> +}
>> +
>>  } // namespace riscv_vector
>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
>> new file mode 100644
>> index 00000000000..c882654cb49
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
>> @@ -0,0 +1,157 @@
>> +/* { dg-do compile } */
>> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
>> +
>> +#include <stdint-gcc.h>
>> +
>> +#define DEF_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX)       \
>> +  void __attribute__ ((noinline, noclone))                     \
>> +  vcond_var_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r,  \
>> +                                  DATA_TYPE *__restrict__ x,   \
>> +                                  DATA_TYPE *__restrict__ y,   \
>> +                                  CMP_TYPE *__restrict__ a,    \
>> +                                  CMP_TYPE *__restrict__ b,    \
>> +                                  int n)                       \
>> +  {                                                            \
>> +    for (int i = 0; i < n; i++)                                        \
>> +      {                                                                \
>> +       DATA_TYPE xval = x[i], yval = y[i];                     \
>> +       CMP_TYPE aval = a[i], bval = b[i];                      \
>> +       r[i] = aval COND bval ? xval : yval;                    \
>> +      }                                                                \
>> +  }
>> +
>> +#define DEF_VCOND_IMM(DATA_TYPE, CMP_TYPE, COND, IMM, SUFFIX)  \
>> +  void __attribute__ ((noinline, noclone))                     \
>> +  vcond_imm_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r,  \
>> +                                  DATA_TYPE *__restrict__ x,   \
>> +                                  DATA_TYPE *__restrict__ y,   \
>> +                                  CMP_TYPE *__restrict__ a,    \
>> +                                  int n)                       \
>> +  {                                                            \
>> +    for (int i = 0; i < n; i++)                                        \
>> +      {                                                                \
>> +       DATA_TYPE xval = x[i], yval = y[i];                     \
>> +       CMP_TYPE aval = a[i];                                   \
>> +       r[i] = aval COND (CMP_TYPE) IMM ? xval : yval;          \
>> +      }                                                                \
>> +  }
>> +
>> +#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX)      \
>> +  T (int8_t, int8_t, COND, SUFFIX)                     \
>> +  T (int16_t, int16_t, COND, SUFFIX)                   \
>> +  T (int32_t, int32_t, COND, SUFFIX)                   \
>> +  T (int64_t, int64_t, COND, SUFFIX)                   \
>> +  T (float, int32_t, COND, SUFFIX##_float)             \
>> +  T (double, int64_t, COND, SUFFIX##_double)
>> +
>> +#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX)    \
>> +  T (uint8_t, uint8_t, COND, SUFFIX)                   \
>> +  T (uint16_t, uint16_t, COND, SUFFIX)                 \
>> +  T (uint32_t, uint32_t, COND, SUFFIX)                 \
>> +  T (uint64_t, uint64_t, COND, SUFFIX)                 \
>> +  T (float, uint32_t, COND, SUFFIX##_float)            \
>> +  T (double, uint64_t, COND, SUFFIX##_double)
>> +
>> +#define TEST_COND_VAR_ALL(T, COND, SUFFIX)     \
>> +  TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX)   \
>> +  TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX)
>> +
>> +#define TEST_VAR_ALL(T)                                \
>> +  TEST_COND_VAR_ALL (T, >, _gt)                        \
>> +  TEST_COND_VAR_ALL (T, <, _lt)                        \
>> +  TEST_COND_VAR_ALL (T, >=, _ge)               \
>> +  TEST_COND_VAR_ALL (T, <=, _le)               \
>> +  TEST_COND_VAR_ALL (T, ==, _eq)               \
>> +  TEST_COND_VAR_ALL (T, !=, _ne)
>> +
>> +#define TEST_COND_IMM_SIGNED_ALL(T, COND, IMM, SUFFIX) \
>> +  T (int8_t, int8_t, COND, IMM, SUFFIX)                        \
>> +  T (int16_t, int16_t, COND, IMM, SUFFIX)              \
>> +  T (int32_t, int32_t, COND, IMM, SUFFIX)              \
>> +  T (int64_t, int64_t, COND, IMM, SUFFIX)              \
>> +  T (float, int32_t, COND, IMM, SUFFIX##_float)                \
>> +  T (double, int64_t, COND, IMM, SUFFIX##_double)
>> +
>> +#define TEST_COND_IMM_UNSIGNED_ALL(T, COND, IMM, SUFFIX)       \
>> +  T (uint8_t, uint8_t, COND, IMM, SUFFIX)                      \
>> +  T (uint16_t, uint16_t, COND, IMM, SUFFIX)                    \
>> +  T (uint32_t, uint32_t, COND, IMM, SUFFIX)                    \
>> +  T (uint64_t, uint64_t, COND, IMM, SUFFIX)                    \
>> +  T (float, uint32_t, COND, IMM, SUFFIX##_float)               \
>> +  T (double, uint64_t, COND, IMM, SUFFIX##_double)
>> +
>> +#define TEST_COND_IMM_ALL(T, COND, IMM, SUFFIX)                \
>> +  TEST_COND_IMM_SIGNED_ALL (T, COND, IMM, SUFFIX)      \
>> +  TEST_COND_IMM_UNSIGNED_ALL (T, COND, IMM, SUFFIX)
>> +
>> +#define TEST_IMM_ALL(T)                                                        \
>> +  /* Expect immediates to make it into the encoding.  */               \
>> +  TEST_COND_IMM_ALL (T, >, 5, _gt)                                     \
>> +  TEST_COND_IMM_ALL (T, <, 5, _lt)                                     \
>> +  TEST_COND_IMM_ALL (T, >=, 5, _ge)                                    \
>> +  TEST_COND_IMM_ALL (T, <=, 5, _le)                                    \
>> +  TEST_COND_IMM_ALL (T, ==, 5, _eq)                                    \
>> +  TEST_COND_IMM_ALL (T, !=, 5, _ne)                                    \
>> +                                                                       \
>> +  TEST_COND_IMM_SIGNED_ALL (T, >, 15, _gt2)                            \
>> +  TEST_COND_IMM_SIGNED_ALL (T, <, 15, _lt2)                            \
>> +  TEST_COND_IMM_SIGNED_ALL (T, >=, 15, _ge2)                           \
>> +  TEST_COND_IMM_SIGNED_ALL (T, <=, 15, _le2)                           \
>> +  TEST_COND_IMM_ALL (T, ==, 15, _eq2)                                  \
>> +  TEST_COND_IMM_ALL (T, !=, 15, _ne2)                                  \
>> +                                                                       \
>> +  TEST_COND_IMM_SIGNED_ALL (T, >, 16, _gt3)                            \
>> +  TEST_COND_IMM_SIGNED_ALL (T, <, 16, _lt3)                            \
>> +  TEST_COND_IMM_SIGNED_ALL (T, >=, 16, _ge3)                           \
>> +  TEST_COND_IMM_SIGNED_ALL (T, <=, 16, _le3)                           \
>> +  TEST_COND_IMM_ALL (T, ==, 16, _eq3)                                  \
>> +  TEST_COND_IMM_ALL (T, !=, 16, _ne3)                                  \
>> +                                                                       \
>> +  TEST_COND_IMM_SIGNED_ALL (T, >, -16, _gt4)                           \
>> +  TEST_COND_IMM_SIGNED_ALL (T, <, -16, _lt4)                           \
>> +  TEST_COND_IMM_SIGNED_ALL (T, >=, -16, _ge4)                          \
>> +  TEST_COND_IMM_SIGNED_ALL (T, <=, -16, _le4)                          \
>> +  TEST_COND_IMM_ALL (T, ==, -16, _eq4)                                 \
>> +  TEST_COND_IMM_ALL (T, !=, -16, _ne4)                                 \
>> +                                                                       \
>> +  TEST_COND_IMM_SIGNED_ALL (T, >, -17, _gt5)                           \
>> +  TEST_COND_IMM_SIGNED_ALL (T, <, -17, _lt5)                           \
>> +  TEST_COND_IMM_SIGNED_ALL (T, >=, -17, _ge5)                          \
>> +  TEST_COND_IMM_SIGNED_ALL (T, <=, -17, _le5)                          \
>> +  TEST_COND_IMM_ALL (T, ==, -17, _eq5)                                 \
>> +  TEST_COND_IMM_ALL (T, !=, -17, _ne5)                                 \
>> +                                                                       \
>> +  TEST_COND_IMM_UNSIGNED_ALL (T, >, 0, _gt6)                           \
>> +  /* Testing if an unsigned value >= 0 or < 0 is pointless as it will  \
>> +     get folded away by the compiler.  */                              \
>> +  TEST_COND_IMM_UNSIGNED_ALL (T, <=, 0, _le6)                          \
>> +                                                                       \
>> +  TEST_COND_IMM_UNSIGNED_ALL (T, >, 127, _gt7)                         \
>> +  TEST_COND_IMM_UNSIGNED_ALL (T, <, 127, _lt7)                         \
>> +  TEST_COND_IMM_UNSIGNED_ALL (T, >=, 127, _ge7)                                \
>> +  TEST_COND_IMM_UNSIGNED_ALL (T, <=, 127, _le7)                                \
>> +                                                                       \
>> +  /* Expect immediates to NOT make it into the encoding, and instead be \
>> +     forced into a register.  */                                       \
>> +  TEST_COND_IMM_UNSIGNED_ALL (T, >, 128, _gt8)                         \
>> +  TEST_COND_IMM_UNSIGNED_ALL (T, <, 128, _lt8)                         \
>> +  TEST_COND_IMM_UNSIGNED_ALL (T, >=, 128, _ge8)                                \
>> +  TEST_COND_IMM_UNSIGNED_ALL (T, <=, 128, _le8)
>> +
>> +TEST_VAR_ALL (DEF_VCOND_VAR)
>> +TEST_IMM_ALL (DEF_VCOND_IMM)
>> +
>> +/* { dg-final { scan-assembler-times {\tvmseq\.vi} 42 } } */
>> +/* { dg-final { scan-assembler-times {\tvmsne\.vi} 42 } } */
>> +/* { dg-final { scan-assembler-times {\tvmsgt\.vi} 30 } } */
>> +/* { dg-final { scan-assembler-times {\tvmsgtu\.vi} 12 } } */
>> +/* { dg-final { scan-assembler-times {\tvmslt\.vi} 8 } } */
>> +/* { dg-final { scan-assembler-times {\tvmsge\.vi} 8 } } */
>> +/* { dg-final { scan-assembler-times {\tvmsle\.vi} 30 } } */
>> +/* { dg-final { scan-assembler-times {\tvmsleu\.vi} 12 } } */
>> +/* { dg-final { scan-assembler-times {\tvmseq} 78 } } */
>> +/* { dg-final { scan-assembler-times {\tvmsne} 78 } } */
>> +/* { dg-final { scan-assembler-times {\tvmsgt} 82 } } */
>> +/* { dg-final { scan-assembler-times {\tvmslt} 38 } } */
>> +/* { dg-final { scan-assembler-times {\tvmsge} 38 } } */
>> +/* { dg-final { scan-assembler-times {\tvmsle} 82 } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-2.c
>> new file mode 100644
>> index 00000000000..738f978c5a1
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-2.c
>> @@ -0,0 +1,75 @@
>> +/* { dg-do compile } */
>> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
>> +
>> +#include <stdint-gcc.h>
>> +
>> +#define eq(A, B) ((A) == (B))
>> +#define ne(A, B) ((A) != (B))
>> +#define olt(A, B) ((A) < (B))
>> +#define ole(A, B) ((A) <= (B))
>> +#define oge(A, B) ((A) >= (B))
>> +#define ogt(A, B) ((A) > (B))
>> +#define ordered(A, B) (!__builtin_isunordered (A, B))
>> +#define unordered(A, B) (__builtin_isunordered (A, B))
>> +#define ueq(A, B) (!__builtin_islessgreater (A, B))
>> +#define ult(A, B) (__builtin_isless (A, B))
>> +#define ule(A, B) (__builtin_islessequal (A, B))
>> +#define uge(A, B) (__builtin_isgreaterequal (A, B))
>> +#define ugt(A, B) (__builtin_isgreater (A, B))
>> +#define nueq(A, B) (__builtin_islessgreater (A, B))
>> +#define nult(A, B) (!__builtin_isless (A, B))
>> +#define nule(A, B) (!__builtin_islessequal (A, B))
>> +#define nuge(A, B) (!__builtin_isgreaterequal (A, B))
>> +#define nugt(A, B) (!__builtin_isgreater (A, B))
>> +
>> +#define TEST_LOOP(TYPE1, TYPE2, CMP)                           \
>> +  void __attribute__ ((noinline, noclone))                     \
>> +  test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest,  \
>> +                                       TYPE1 *restrict src,    \
>> +                                       TYPE1 fallback,         \
>> +                                       TYPE2 *restrict a,      \
>> +                                       TYPE2 *restrict b,      \
>> +                                       int count)              \
>> +  {                                                            \
>> +    for (int i = 0; i < count; ++i)                            \
>> +      {\
>> +        TYPE2 aval = a[i]; \
>> +        TYPE2 bval = b[i]; \
>> +        TYPE1 srcval = src[i]; \
>> +        dest[i] = CMP (aval, bval) ? srcval : fallback;                \
>> +      }\
>> +  }
>> +
>> +#define TEST_CMP(CMP) \
>> +  TEST_LOOP (int32_t, float, CMP) \
>> +  TEST_LOOP (uint32_t, float, CMP) \
>> +  TEST_LOOP (float, float, CMP) \
>> +  TEST_LOOP (int64_t, double, CMP) \
>> +  TEST_LOOP (uint64_t, double, CMP) \
>> +  TEST_LOOP (double, double, CMP)
>> +
>> +TEST_CMP (eq)
>> +TEST_CMP (ne)
>> +TEST_CMP (olt)
>> +TEST_CMP (ole)
>> +TEST_CMP (oge)
>> +TEST_CMP (ogt)
>> +TEST_CMP (ordered)
>> +TEST_CMP (unordered)
>> +TEST_CMP (ueq)
>> +TEST_CMP (ult)
>> +TEST_CMP (ule)
>> +TEST_CMP (uge)
>> +TEST_CMP (ugt)
>> +TEST_CMP (nueq)
>> +TEST_CMP (nult)
>> +TEST_CMP (nule)
>> +TEST_CMP (nuge)
>> +TEST_CMP (nugt)
>> +
>> +/* { dg-final { scan-assembler-times {\tvmfeq} 150 } } */
>> +/* { dg-final { scan-assembler-times {\tvmfne} 6 } } */
>> +/* { dg-final { scan-assembler-times {\tvmfgt} 30 } } */
>> +/* { dg-final { scan-assembler-times {\tvmflt} 30 } } */
>> +/* { dg-final { scan-assembler-times {\tvmfge} 18 } } */
>> +/* { dg-final { scan-assembler-times {\tvmfle} 18 } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-3.c
>> new file mode 100644
>> index 00000000000..53384829e64
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-3.c
>> @@ -0,0 +1,13 @@
>> +/* { dg-do compile } */
>> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-trapping-math" } */
>> +
>> +/* The difference here is that nueq can use LTGT.  */
>> +
>> +#include "vcond-2.c"
>> +
>> +/* { dg-final { scan-assembler-times {\tvmfeq} 90 } } */
>> +/* { dg-final { scan-assembler-times {\tvmfne} 6 } } */
>> +/* { dg-final { scan-assembler-times {\tvmfgt} 30 } } */
>> +/* { dg-final { scan-assembler-times {\tvmflt} 30 } } */
>> +/* { dg-final { scan-assembler-times {\tvmfge} 18 } } */
>> +/* { dg-final { scan-assembler-times {\tvmfle} 18 } } */
>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-1.c
>> new file mode 100644
>> index 00000000000..a84d22d2a73
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-1.c
>> @@ -0,0 +1,49 @@
>> +/* { dg-do run { target { riscv_vector } } } */
>> +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
>> +
>> +#include "vcond-1.c"
>> +
>> +#define N 97
>> +
>> +#define TEST_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX)      \
>> +{                                                              \
>> +  DATA_TYPE x[N], y[N], r[N];                                  \
>> +  CMP_TYPE a[N], b[N];                                         \
>> +  for (int i = 0; i < N; ++i)                                  \
>> +    {                                                          \
>> +      x[i] = i;                                                        \
>> +      y[i] = (i & 1) + 5;                                      \
>> +      a[i] = i - N / 3;                                                \
>> +      b[i] = N - N / 3 - i;                                    \
>> +      asm volatile ("" ::: "memory");                          \
>> +    }                                                          \
>> +  vcond_var_##CMP_TYPE##_##SUFFIX (r, x, y, a, b, N);          \
>> +  for (int i = 0; i < N; ++i)                                  \
>> +    if (r[i] != (a[i] COND b[i] ? x[i] : y[i]))                        \
>> +      __builtin_abort ();                                      \
>> +}
>> +
>> +#define TEST_VCOND_IMM(DATA_TYPE, CMP_TYPE, COND, IMM, SUFFIX) \
>> +{                                                              \
>> +  DATA_TYPE x[N], y[N], r[N];                                  \
>> +  CMP_TYPE a[N];                                               \
>> +  for (int i = 0; i < N; ++i)                                  \
>> +    {                                                          \
>> +      x[i] = i;                                                        \
>> +      y[i] = (i & 1) + 5;                                      \
>> +      a[i] = IMM - N / 3 + i;                                  \
>> +      asm volatile ("" ::: "memory");                          \
>> +    }                                                          \
>> +  vcond_imm_##CMP_TYPE##_##SUFFIX (r, x, y, a, N);             \
>> +  for (int i = 0; i < N; ++i)                                  \
>> +    if (r[i] != (a[i] COND (CMP_TYPE) IMM ? x[i] : y[i]))      \
>> +      __builtin_abort ();                                      \
>> +}
>> +
>> +int __attribute__ ((optimize (1)))
>> +main (int argc, char **argv)
>> +{
>> +  TEST_VAR_ALL (TEST_VCOND_VAR)
>> +  TEST_IMM_ALL (TEST_VCOND_IMM)
>> +  return 0;
>> +}
>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-2.c
>> new file mode 100644
>> index 00000000000..56fd39f4691
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-2.c
>> @@ -0,0 +1,76 @@
>> +/* { dg-do run { target { riscv_vector } } } */
>> +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
>> +/* { dg-require-effective-target fenv_exceptions } */
>> +
>> +#include "vcond-2.c"
>> +
>> +#ifndef TEST_EXCEPTIONS
>> +#define TEST_EXCEPTIONS 1
>> +#endif
>> +
>> +#include <fenv.h>
>> +
>> +#define N 401
>> +
>> +#define RUN_LOOP(TYPE1, TYPE2, CMP, EXPECT_INVALID)                    \
>> +  {                                                                    \
>> +    TYPE1 dest[N], src[N];                                             \
>> +    TYPE2 a[N], b[N];                                                  \
>> +    for (int i = 0; i < N; ++i)                                                \
>> +      {                                                                        \
>> +       src[i] = i * i;                                                 \
>> +       if (i % 5 == 0)                                                 \
>> +         a[i] = 0;                                                     \
>> +       else if (i % 3)                                                 \
>> +         a[i] = i * 0.1;                                               \
>> +       else                                                            \
>> +         a[i] = i;                                                     \
>> +       if (i % 7 == 0)                                                 \
>> +         b[i] = __builtin_nan ("");                                    \
>> +       else if (i % 6)                                                 \
>> +         b[i] = i * 0.1;                                               \
>> +       else                                                            \
>> +         b[i] = i;                                                     \
>> +       asm volatile ("" ::: "memory");                                 \
>> +      }                                                                        \
>> +    feclearexcept (FE_ALL_EXCEPT);                                     \
>> +    test_##TYPE1##_##TYPE2##_##CMP##_var (dest, src, 11, a, b, N);     \
>> +    if (TEST_EXCEPTIONS                                                        \
>> +       && !fetestexcept (FE_INVALID) != !(EXPECT_INVALID))             \
>> +      __builtin_abort ();                                              \
>> +    for (int i = 0; i < N; ++i)                                                \
>> +      if (dest[i] != (CMP (a[i], b[i]) ? src[i] : 11))                 \
>> +       __builtin_abort ();                                             \
>> +  }
>> +
>> +#define RUN_CMP(CMP, EXPECT_INVALID) \
>> +  RUN_LOOP (int32_t, float, CMP, EXPECT_INVALID) \
>> +  RUN_LOOP (uint32_t, float, CMP, EXPECT_INVALID) \
>> +  RUN_LOOP (float, float, CMP, EXPECT_INVALID) \
>> +  RUN_LOOP (int64_t, double, CMP, EXPECT_INVALID) \
>> +  RUN_LOOP (uint64_t, double, CMP, EXPECT_INVALID) \
>> +  RUN_LOOP (double, double, CMP, EXPECT_INVALID)
>> +
>> +int __attribute__ ((optimize (1)))
>> +main (void)
>> +{
>> +  RUN_CMP (eq, 0)
>> +  RUN_CMP (ne, 0)
>> +  RUN_CMP (olt, 1)
>> +  RUN_CMP (ole, 1)
>> +  RUN_CMP (oge, 1)
>> +  RUN_CMP (ogt, 1)
>> +  RUN_CMP (ordered, 0)
>> +  RUN_CMP (unordered, 0)
>> +  RUN_CMP (ueq, 0)
>> +  RUN_CMP (ult, 0)
>> +  RUN_CMP (ule, 0)
>> +  RUN_CMP (uge, 0)
>> +  RUN_CMP (ugt, 0)
>> +  RUN_CMP (nueq, 0)
>> +  RUN_CMP (nult, 0)
>> +  RUN_CMP (nule, 0)
>> +  RUN_CMP (nuge, 0)
>> +  RUN_CMP (nugt, 0)
>> +  return 0;
>> +}
>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-3.c
>> new file mode 100644
>> index 00000000000..e50d561bd98
>> --- /dev/null
>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-3.c
>> @@ -0,0 +1,6 @@
>> +/* { dg-do run { target { riscv_vector } } } */
>> +/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-trapping-math" } */
>> +/* { dg-require-effective-target fenv_exceptions } */
>> +
>> +#define TEST_EXCEPTIONS 0
>> +#include "vcond_run-2.c"
>> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
>> index bc99cc0c3cf..9809a421fc8 100644
>> --- a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
>> +++ b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
>> @@ -63,6 +63,8 @@ foreach op $AUTOVEC_TEST_OPTS {
>>      "" "$op"
>>    dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/binop/*.\[cS\]]] \
>>      "" "$op"
>> +  dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/cmp/*.\[cS\]]] \
>> +    "" "$op"
>>  }
>>
>>  # VLS-VLMAX tests
>> --
>> 2.36.3
>>
Richard Biener May 24, 2023, 11:20 a.m. UTC | #3
On Wed, May 24, 2023 at 11:57 AM Richard Sandiford
<richard.sandiford@arm.com> wrote:
>
> Richard Biener <richard.guenther@gmail.com> writes:
> > On Tue, May 23, 2023 at 5:05 PM <juzhe.zhong@rivai.ai> wrote:
> >>
> >> From: Juzhe-Zhong <juzhe.zhong@rivai.ai>
> >>
> >> This patch enable RVV auto-vectorization including floating-point
> >> unorder and order comparison.
> >>
> >> The testcases are leveraged from Richard.
> >> So include Richard as co-author.
> >>
> >> Co-Authored-By: Richard Sandiford <richard.sandiford@arm.com>
> >>
> >> gcc/ChangeLog:
> >>
> >>         * config/riscv/autovec.md (@vcond_mask_<mode><vm>): New pattern.
> >>         (vec_cmp<mode><vm>): Ditto.
> >>         (vec_cmpu<mode><vm>): Ditto.
> >>         (vcond<V:mode><VI:mode>): Ditto.
> >>         (vcondu<V:mode><VI:mode>): Ditto.
> >
> > Unless the ISA really can do compare and select in a single instruction
> > I'd advise against adding vcond patterns but instead fully rely on
> > vec_cmp_ + vcond_mask_ only.
> >
> > I've not heard of an ISA implementing vcond with a single instructions
> > so vcond* should eventually die ...
>
> Yeah.  The SVE code predates modern ISEL.  I've never gone back to check
> whether defining the plain vcond patterns is still necessary, or whether
> they could just be dropped.

Likewise for x86.  I'll note we also have vcondeq in addition to vcond[u].

Richard.

> Richard
>
> >>         * config/riscv/riscv-protos.h (enum insn_type): Add new enum.
> >>         (emit_vlmax_merge_insn): New function.
> >>         (emit_vlmax_cmp_insn): Ditto.
> >>         (expand_vec_cmp): Ditto.
> >>         (expand_vec_cmp_float):Ditto.
> >>         (expand_vcond):Ditto.
> >>         * config/riscv/riscv-v.cc (emit_vlmax_merge_insn): Ditto.
> >>         (emit_vlmax_cmp_insn): Ditto.
> >>         (get_cmp_insn_code): Ditto.
> >>         (expand_vec_cmp): Ditto.
> >>         (expand_vec_cmp_float): Ditto.
> >>         (expand_vcond): Ditto.
> >>
> >> gcc/testsuite/ChangeLog:
> >>
> >>         * gcc.target/riscv/rvv/rvv.exp: Add RVV comparison testcases.
> >>         * gcc.target/riscv/rvv/autovec/cmp/vcond-1.c: New test.
> >>         * gcc.target/riscv/rvv/autovec/cmp/vcond-2.c: New test.
> >>         * gcc.target/riscv/rvv/autovec/cmp/vcond-3.c: New test.
> >>         * gcc.target/riscv/rvv/autovec/cmp/vcond_run-1.c: New test.
> >>         * gcc.target/riscv/rvv/autovec/cmp/vcond_run-2.c: New test.
> >>         * gcc.target/riscv/rvv/autovec/cmp/vcond_run-3.c: New test.
> >>
> >> ---
> >>  gcc/config/riscv/autovec.md                   | 112 ++++++++
> >>  gcc/config/riscv/riscv-protos.h               |   7 +
> >>  gcc/config/riscv/riscv-v.cc                   | 266 +++++++++++++++++-
> >>  .../riscv/rvv/autovec/cmp/vcond-1.c           | 157 +++++++++++
> >>  .../riscv/rvv/autovec/cmp/vcond-2.c           |  75 +++++
> >>  .../riscv/rvv/autovec/cmp/vcond-3.c           |  13 +
> >>  .../riscv/rvv/autovec/cmp/vcond_run-1.c       |  49 ++++
> >>  .../riscv/rvv/autovec/cmp/vcond_run-2.c       |  76 +++++
> >>  .../riscv/rvv/autovec/cmp/vcond_run-3.c       |   6 +
> >>  gcc/testsuite/gcc.target/riscv/rvv/rvv.exp    |   2 +
> >>  10 files changed, 756 insertions(+), 7 deletions(-)
> >>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
> >>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-2.c
> >>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-3.c
> >>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-1.c
> >>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-2.c
> >>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-3.c
> >>
> >> diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
> >> index 04b4459222a..e0258e8b798 100644
> >> --- a/gcc/config/riscv/autovec.md
> >> +++ b/gcc/config/riscv/autovec.md
> >> @@ -162,3 +162,115 @@
> >>                                  riscv_vector::RVV_BINOP, operands);
> >>    DONE;
> >>  })
> >> +
> >> +;; =========================================================================
> >> +;; == Comparisons and selects
> >> +;; =========================================================================
> >> +
> >> +;; -------------------------------------------------------------------------
> >> +;; ---- [INT,FP] Select based on masks
> >> +;; -------------------------------------------------------------------------
> >> +;; Includes merging patterns for:
> >> +;; - vmerge.vv
> >> +;; - vmerge.vx
> >> +;; - vfmerge.vf
> >> +;; -------------------------------------------------------------------------
> >> +
> >> +(define_expand "@vcond_mask_<mode><vm>"
> >> +  [(match_operand:V 0 "register_operand")
> >> +   (match_operand:<VM> 3 "register_operand")
> >> +   (match_operand:V 1 "nonmemory_operand")
> >> +   (match_operand:V 2 "register_operand")]
> >> +  "TARGET_VECTOR"
> >> +  {
> >> +    /* The order of vcond_mask is opposite to pred_merge.  */
> >> +    std::swap (operands[1], operands[2]);
> >> +    riscv_vector::emit_vlmax_merge_insn (code_for_pred_merge (<MODE>mode),
> >> +                       riscv_vector::RVV_MERGE_OP, operands);
> >> +    DONE;
> >> +  }
> >> +)
> >> +
> >> +;; -------------------------------------------------------------------------
> >> +;; ---- [INT,FP] Comparisons
> >> +;; -------------------------------------------------------------------------
> >> +;; Includes:
> >> +;; - vms<eq/ne/ltu/lt/leu/le/gtu/gt>.<vv/vx/vi>
> >> +;; -------------------------------------------------------------------------
> >> +
> >> +(define_expand "vec_cmp<mode><vm>"
> >> +  [(set (match_operand:<VM> 0 "register_operand")
> >> +       (match_operator:<VM> 1 "comparison_operator"
> >> +         [(match_operand:VI 2 "register_operand")
> >> +          (match_operand:VI 3 "register_operand")]))]
> >> +  "TARGET_VECTOR"
> >> +  {
> >> +    riscv_vector::expand_vec_cmp (operands[0], GET_CODE (operands[1]),
> >> +                                 operands[2], operands[3]);
> >> +    DONE;
> >> +  }
> >> +)
> >> +
> >> +(define_expand "vec_cmpu<mode><vm>"
> >> +  [(set (match_operand:<VM> 0 "register_operand")
> >> +       (match_operator:<VM> 1 "comparison_operator"
> >> +         [(match_operand:VI 2 "register_operand")
> >> +          (match_operand:VI 3 "register_operand")]))]
> >> +  "TARGET_VECTOR"
> >> +  {
> >> +    riscv_vector::expand_vec_cmp (operands[0], GET_CODE (operands[1]),
> >> +                                 operands[2], operands[3]);
> >> +    DONE;
> >> +  }
> >> +)
> >> +
> >> +(define_expand "vec_cmp<mode><vm>"
> >> +  [(set (match_operand:<VM> 0 "register_operand")
> >> +       (match_operator:<VM> 1 "comparison_operator"
> >> +         [(match_operand:VF 2 "register_operand")
> >> +          (match_operand:VF 3 "register_operand")]))]
> >> +  "TARGET_VECTOR"
> >> +  {
> >> +    riscv_vector::expand_vec_cmp_float (operands[0], GET_CODE (operands[1]),
> >> +                                       operands[2], operands[3], false);
> >> +    DONE;
> >> +  }
> >> +)
> >> +
> >> +;; -------------------------------------------------------------------------
> >> +;; ---- [INT,FP] Compare and select
> >> +;; -------------------------------------------------------------------------
> >> +;; The patterns in this section are synthetic.
> >> +;; -------------------------------------------------------------------------
> >> +
> >> +(define_expand "vcond<V:mode><VI:mode>"
> >> +  [(set (match_operand:V 0 "register_operand")
> >> +       (if_then_else:V
> >> +         (match_operator 3 "comparison_operator"
> >> +           [(match_operand:VI 4 "register_operand")
> >> +            (match_operand:VI 5 "register_operand")])
> >> +         (match_operand:V 1 "register_operand")
> >> +         (match_operand:V 2 "register_operand")))]
> >> +  "TARGET_VECTOR && known_eq (GET_MODE_NUNITS (<V:MODE>mode),
> >> +               GET_MODE_NUNITS (<VI:MODE>mode))"
> >> +  {
> >> +    riscv_vector::expand_vcond (operands);
> >> +    DONE;
> >> +  }
> >> +)
> >> +
> >> +(define_expand "vcondu<V:mode><VI:mode>"
> >> +  [(set (match_operand:V 0 "register_operand")
> >> +       (if_then_else:V
> >> +         (match_operator 3 "comparison_operator"
> >> +           [(match_operand:VI 4 "register_operand")
> >> +            (match_operand:VI 5 "register_operand")])
> >> +         (match_operand:V 1 "register_operand")
> >> +         (match_operand:V 2 "register_operand")))]
> >> +  "TARGET_VECTOR && known_eq (GET_MODE_NUNITS (<V:MODE>mode),
> >> +               GET_MODE_NUNITS (<VI:MODE>mode))"
> >> +  {
> >> +    riscv_vector::expand_vcond (operands);
> >> +    DONE;
> >> +  }
> >> +)
> >> diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
> >> index 0ae4656befb..58e55c234cb 100644
> >> --- a/gcc/config/riscv/riscv-protos.h
> >> +++ b/gcc/config/riscv/riscv-protos.h
> >> @@ -137,6 +137,8 @@ enum insn_type
> >>    RVV_MISC_OP = 1,
> >>    RVV_UNOP = 2,
> >>    RVV_BINOP = 3,
> >> +  RVV_MERGE_OP = 4,
> >> +  RVV_CMP_OP = 4,
> >>  };
> >>  enum vlmul_type
> >>  {
> >> @@ -174,6 +176,8 @@ void emit_vlmax_vsetvl (machine_mode, rtx);
> >>  void emit_hard_vlmax_vsetvl (machine_mode, rtx);
> >>  void emit_vlmax_insn (unsigned, int, rtx *);
> >>  void emit_nonvlmax_insn (unsigned, int, rtx *);
> >> +void emit_vlmax_merge_insn (unsigned, int, rtx *);
> >> +void emit_vlmax_cmp_insn (unsigned, int, rtx *);
> >>  enum vlmul_type get_vlmul (machine_mode);
> >>  unsigned int get_ratio (machine_mode);
> >>  unsigned int get_nf (machine_mode);
> >> @@ -204,6 +208,8 @@ bool simm5_p (rtx);
> >>  bool neg_simm5_p (rtx);
> >>  #ifdef RTX_CODE
> >>  bool has_vi_variant_p (rtx_code, rtx);
> >> +void expand_vec_cmp (rtx, rtx_code, rtx, rtx);
> >> +bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
> >>  #endif
> >>  bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
> >>                           bool, void (*)(rtx *, rtx));
> >> @@ -226,6 +232,7 @@ machine_mode preferred_simd_mode (scalar_mode);
> >>  opt_machine_mode get_mask_mode (machine_mode);
> >>  void expand_vec_series (rtx, rtx, rtx);
> >>  void expand_vec_init (rtx, rtx);
> >> +void expand_vcond (rtx *);
> >>  /* Rounding mode bitfield for fixed point VXRM.  */
> >>  enum vxrm_field_enum
> >>  {
> >> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> >> index 478a052a779..e7c0ec226cf 100644
> >> --- a/gcc/config/riscv/riscv-v.cc
> >> +++ b/gcc/config/riscv/riscv-v.cc
> >> @@ -169,7 +169,7 @@ public:
> >>
> >>      if (m_needs_avl_p)
> >>        {
> >> -       rtx len = ops[m_op_num];
> >> +       rtx len;
> >>         if (m_vlmax_p)
> >>           {
> >>             if (const_vlmax_p (m_dest_mode))
> >> @@ -185,6 +185,16 @@ public:
> >>                 len = gen_reg_rtx (Pmode);
> >>                 emit_vlmax_vsetvl (m_dest_mode, len);
> >>               }
> >> +           else
> >> +             {
> >> +               gcc_assert (ops[m_op_num]);
> >> +               len = ops[m_op_num];
> >> +             }
> >> +         }
> >> +       else
> >> +         {
> >> +           gcc_assert (ops[m_op_num]);
> >> +           len = ops[m_op_num];
> >>           }
> >>         add_input_operand (len, Pmode);
> >>        }
> >> @@ -341,15 +351,15 @@ autovec_use_vlmax_p (void)
> >>  void
> >>  emit_vlmax_insn (unsigned icode, int op_num, rtx *ops)
> >>  {
> >> -  machine_mode data_mode = GET_MODE (ops[0]);
> >> -  machine_mode mask_mode = get_mask_mode (data_mode).require ();
> >> +  machine_mode dest_mode = GET_MODE (ops[0]);
> >> +  machine_mode mask_mode = get_mask_mode (dest_mode).require ();
> >>    /* We have a maximum of 11 operands for RVV instruction patterns according to
> >>     * vector.md.  */
> >>    insn_expander<11> e (/*OP_NUM*/ op_num, /*HAS_DEST_P*/ true,
> >>                        /*FULLY_UNMASKED_P*/ true,
> >>                        /*USE_REAL_MERGE_P*/ false, /*HAS_AVL_P*/ true,
> >>                        /*VLMAX_P*/ true,
> >> -                      /*DEST_MODE*/ data_mode, /*MASK_MODE*/ mask_mode);
> >> +                      /*DEST_MODE*/ dest_mode, /*MASK_MODE*/ mask_mode);
> >>    e.set_policy (TAIL_ANY);
> >>    e.set_policy (MASK_ANY);
> >>    e.emit_insn ((enum insn_code) icode, ops);
> >> @@ -360,20 +370,52 @@ emit_vlmax_insn (unsigned icode, int op_num, rtx *ops)
> >>  void
> >>  emit_nonvlmax_insn (unsigned icode, int op_num, rtx *ops)
> >>  {
> >> -  machine_mode data_mode = GET_MODE (ops[0]);
> >> -  machine_mode mask_mode = get_mask_mode (data_mode).require ();
> >> +  machine_mode dest_mode = GET_MODE (ops[0]);
> >> +  machine_mode mask_mode = get_mask_mode (dest_mode).require ();
> >>    /* We have a maximum of 11 operands for RVV instruction patterns according to
> >>     * vector.md.  */
> >>    insn_expander<11> e (/*OP_NUM*/ op_num, /*HAS_DEST_P*/ true,
> >>                        /*FULLY_UNMASKED_P*/ true,
> >>                        /*USE_REAL_MERGE_P*/ false, /*HAS_AVL_P*/ true,
> >>                        /*VLMAX_P*/ false,
> >> -                      /*DEST_MODE*/ data_mode, /*MASK_MODE*/ mask_mode);
> >> +                      /*DEST_MODE*/ dest_mode, /*MASK_MODE*/ mask_mode);
> >>    e.set_policy (TAIL_ANY);
> >>    e.set_policy (MASK_ANY);
> >>    e.emit_insn ((enum insn_code) icode, ops);
> >>  }
> >>
> >> +/* This function emits merge instruction.  */
> >> +void
> >> +emit_vlmax_merge_insn (unsigned icode, int op_num, rtx *ops)
> >> +{
> >> +  machine_mode dest_mode = GET_MODE (ops[0]);
> >> +  machine_mode mask_mode = get_mask_mode (dest_mode).require ();
> >> +  insn_expander<11> e (/*OP_NUM*/ op_num, /*HAS_DEST_P*/ true,
> >> +                      /*FULLY_UNMASKED_P*/ false,
> >> +                      /*USE_REAL_MERGE_P*/ false, /*HAS_AVL_P*/ true,
> >> +                      /*VLMAX_P*/ true,
> >> +                      dest_mode, mask_mode);
> >> +  e.set_policy (TAIL_ANY);
> >> +  e.emit_insn ((enum insn_code) icode, ops);
> >> +}
> >> +
> >> +/* This function emits cmp instruction.  */
> >> +void
> >> +emit_vlmax_cmp_insn (unsigned icode, int op_num, rtx *ops)
> >> +{
> >> +  machine_mode mode = GET_MODE (ops[0]);
> >> +  bool fully_unmasked_p = op_num == RVV_CMP_OP ? true : false;
> >> +  bool use_real_merge_p = op_num == RVV_CMP_OP ? false : true;
> >> +  insn_expander<11> e (/*OP_NUM*/ op_num, /*HAS_DEST_P*/ true,
> >> +                      /*FULLY_UNMASKED_P*/ fully_unmasked_p,
> >> +                      /*USE_REAL_MERGE_P*/ use_real_merge_p,
> >> +                      /*HAS_AVL_P*/ true,
> >> +                      /*VLMAX_P*/ true,
> >> +                      /*DEST_MODE*/ mode, /*MASK_MODE*/ mode);
> >> +  e.set_policy (op_num == RVV_CMP_OP ? MASK_UNDISTURBED : MASK_ANY);
> >> +  e.emit_insn ((enum insn_code) icode, ops);
> >> +}
> >> +
> >>  /* Expand series const vector.  */
> >>
> >>  void
> >> @@ -1318,4 +1360,214 @@ expand_vec_init (rtx target, rtx vals)
> >>    expand_vector_init_insert_elems (target, v, nelts);
> >>  }
> >>
> >> +/* Get insn code for corresponding comparison.  */
> >> +
> >> +static insn_code
> >> +get_cmp_insn_code (rtx_code code, machine_mode mode)
> >> +{
> >> +  insn_code icode;
> >> +  switch (code)
> >> +    {
> >> +    case EQ:
> >> +    case NE:
> >> +    case LE:
> >> +    case LEU:
> >> +    case GT:
> >> +    case GTU:
> >> +    case LTGT:
> >> +      icode = code_for_pred_cmp (mode);
> >> +      break;
> >> +    case LT:
> >> +    case LTU:
> >> +    case GE:
> >> +    case GEU:
> >> +      if (FLOAT_MODE_P (mode))
> >> +       icode = code_for_pred_cmp (mode);
> >> +      else
> >> +       icode = code_for_pred_ltge (mode);
> >> +      break;
> >> +    default:
> >> +      gcc_unreachable ();
> >> +    }
> >> +  return icode;
> >> +}
> >> +
> >> +/* Expand an RVV comparison.  */
> >> +
> >> +void
> >> +expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1)
> >> +{
> >> +  machine_mode mask_mode = GET_MODE (target);
> >> +  machine_mode data_mode = GET_MODE (op0);
> >> +  insn_code icode = get_cmp_insn_code (code, data_mode);
> >> +
> >> +  if (code == LTGT)
> >> +    {
> >> +      rtx lt = gen_reg_rtx (mask_mode);
> >> +      rtx gt = gen_reg_rtx (mask_mode);
> >> +      expand_vec_cmp (lt, LT, op0, op1);
> >> +      expand_vec_cmp (gt, GT, op0, op1);
> >> +      icode = code_for_pred (IOR, mask_mode);
> >> +      rtx ops[3] = {target, lt, gt};
> >> +      emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, ops);
> >> +      return;
> >> +    }
> >> +
> >> +  rtx cmp = gen_rtx_fmt_ee (code, mask_mode, op0, op1);
> >> +  rtx ops[RVV_CMP_OP] = {target, cmp, op0, op1};
> >> +  emit_vlmax_cmp_insn (icode, RVV_CMP_OP, ops);
> >> +}
> >> +
> >> +void
> >> +expand_vec_cmp (rtx target, rtx_code code, rtx mask, rtx maskoff, rtx op0,
> >> +               rtx op1)
> >> +{
> >> +  machine_mode mask_mode = GET_MODE (target);
> >> +  machine_mode data_mode = GET_MODE (op0);
> >> +  insn_code icode = get_cmp_insn_code (code, data_mode);
> >> +
> >> +  if (code == LTGT)
> >> +    {
> >> +      rtx lt = gen_reg_rtx (mask_mode);
> >> +      rtx gt = gen_reg_rtx (mask_mode);
> >> +      expand_vec_cmp (lt, LT, mask, maskoff, op0, op1);
> >> +      expand_vec_cmp (gt, GT, mask, maskoff, op0, op1);
> >> +      icode = code_for_pred (IOR, mask_mode);
> >> +      rtx ops[RVV_BINOP] = {target, lt, gt};
> >> +      emit_vlmax_insn (icode, RVV_BINOP, ops);
> >> +      return;
> >> +    }
> >> +
> >> +  rtx cmp = gen_rtx_fmt_ee (code, mask_mode, op0, op1);
> >> +  rtx ops[RVV_CMP_OP + 2] = {target, mask, maskoff, cmp, op0, op1};
> >> +  emit_vlmax_cmp_insn (icode, RVV_CMP_OP + 2, ops);
> >> +}
> >> +
> >> +/* Expand an RVV floating-point comparison:
> >> +
> >> +   If CAN_INVERT_P is true, the caller can also handle inverted results;
> >> +   return true if the result is in fact inverted.  */
> >> +
> >> +bool
> >> +expand_vec_cmp_float (rtx target, rtx_code code, rtx op0, rtx op1,
> >> +                     bool can_invert_p)
> >> +{
> >> +  machine_mode mask_mode = GET_MODE (target);
> >> +  machine_mode data_mode = GET_MODE (op0);
> >> +
> >> +  /* If can_invert_p = true:
> >> +     It suffices to implement a u>= b as !(a < b) but with the NaNs masked off:
> >> +
> >> +       vmfeq.vv    v0, va, va
> >> +       vmfeq.vv    v1, vb, vb
> >> +       vmand.mm    v0, v0, v1
> >> +       vmflt.vv    v0, va, vb, v0.t
> >> +       vmnot.m     v0, v0
> >> +
> >> +     And, if !HONOR_SNANS, then you can remove the vmand.mm by masking the
> >> +     second vmfeq.vv:
> >> +
> >> +       vmfeq.vv    v0, va, va
> >> +       vmfeq.vv    v0, vb, vb, v0.t
> >> +       vmflt.vv    v0, va, vb, v0.t
> >> +       vmnot.m     v0, v0
> >> +
> >> +     If can_invert_p = false:
> >> +
> >> +       # Example of implementing isgreater()
> >> +       vmfeq.vv v0, va, va        # Only set where A is not NaN.
> >> +       vmfeq.vv v1, vb, vb        # Only set where B is not NaN.
> >> +       vmand.mm v0, v0, v1        # Only set where A and B are ordered,
> >> +       vmfgt.vv v0, va, vb, v0.t  #  so only set flags on ordered values.
> >> +  */
> >> +
> >> +  rtx eq0 = gen_reg_rtx (mask_mode);
> >> +  rtx eq1 = gen_reg_rtx (mask_mode);
> >> +  switch (code)
> >> +    {
> >> +    case EQ:
> >> +    case NE:
> >> +    case LT:
> >> +    case LE:
> >> +    case GT:
> >> +    case GE:
> >> +    case LTGT:
> >> +      /* There is native support for the comparison.  */
> >> +      expand_vec_cmp (target, code, op0, op1);
> >> +      return false;
> >> +    case UNEQ:
> >> +    case ORDERED:
> >> +    case UNORDERED:
> >> +    case UNLT:
> >> +    case UNLE:
> >> +    case UNGT:
> >> +    case UNGE:
> >> +      /* vmfeq.vv v0, va, va  */
> >> +      expand_vec_cmp (eq0, EQ, op0, op0);
> >> +      if (HONOR_SNANS (data_mode))
> >> +       {
> >> +         /*
> >> +            vmfeq.vv    v1, vb, vb
> >> +            vmand.mm    v0, v0, v1
> >> +         */
> >> +         expand_vec_cmp (eq1, EQ, op1, op1);
> >> +         insn_code icode = code_for_pred (AND, mask_mode);
> >> +         rtx ops[3] = {eq0, eq0, eq1};
> >> +         emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, ops);
> >> +       }
> >> +      else
> >> +       {
> >> +         /* vmfeq.vv    v0, vb, vb, v0.t  */
> >> +         expand_vec_cmp (eq0, EQ, eq0, eq0, op1, op1);
> >> +       }
> >> +      break;
> >> +    default:
> >> +      gcc_unreachable ();
> >> +    }
> >> +
> >> +  if (code == ORDERED)
> >> +    {
> >> +      emit_move_insn (target, eq0);
> >> +      return false;
> >> +    }
> >> +
> >> +  /* There is native support for the inverse comparison.  */
> >> +  code = reverse_condition_maybe_unordered (code);
> >> +  if (code == ORDERED)
> >> +    emit_move_insn (target, eq0);
> >> +  else
> >> +    expand_vec_cmp (eq0, code, eq0, eq0, op0, op1);
> >> +
> >> +  if (can_invert_p)
> >> +    {
> >> +      emit_move_insn (target, eq0);
> >> +      return true;
> >> +    }
> >> +  insn_code icode = code_for_pred_not (mask_mode);
> >> +  rtx ops[RVV_UNOP] = {target, eq0};
> >> +  emit_vlmax_insn (icode, RVV_UNOP, ops);
> >> +  return false;
> >> +}
> >> +
> >> +/* Expand an RVV vcond pattern with operands OPS.  DATA_MODE is the mode
> >> +   of the data being merged and CMP_MODE is the mode of the values being
> >> +   compared.  */
> >> +
> >> +void
> >> +expand_vcond (rtx *ops)
> >> +{
> >> +  machine_mode cmp_mode = GET_MODE (ops[4]);
> >> +  machine_mode data_mode = GET_MODE (ops[1]);
> >> +  machine_mode mask_mode = get_mask_mode (cmp_mode).require ();
> >> +  rtx mask = gen_reg_rtx (mask_mode);
> >> +  if (FLOAT_MODE_P (cmp_mode))
> >> +    {
> >> +      if (expand_vec_cmp_float (mask, GET_CODE (ops[3]), ops[4], ops[5], true))
> >> +       std::swap (ops[1], ops[2]);
> >> +    }
> >> +  else
> >> +    expand_vec_cmp (mask, GET_CODE (ops[3]), ops[4], ops[5]);
> >> +  emit_insn (gen_vcond_mask (data_mode, data_mode, ops[0], ops[1], ops[2], mask));
> >> +}
> >> +
> >>  } // namespace riscv_vector
> >> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
> >> new file mode 100644
> >> index 00000000000..c882654cb49
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
> >> @@ -0,0 +1,157 @@
> >> +/* { dg-do compile } */
> >> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
> >> +
> >> +#include <stdint-gcc.h>
> >> +
> >> +#define DEF_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX)       \
> >> +  void __attribute__ ((noinline, noclone))                     \
> >> +  vcond_var_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r,  \
> >> +                                  DATA_TYPE *__restrict__ x,   \
> >> +                                  DATA_TYPE *__restrict__ y,   \
> >> +                                  CMP_TYPE *__restrict__ a,    \
> >> +                                  CMP_TYPE *__restrict__ b,    \
> >> +                                  int n)                       \
> >> +  {                                                            \
> >> +    for (int i = 0; i < n; i++)                                        \
> >> +      {                                                                \
> >> +       DATA_TYPE xval = x[i], yval = y[i];                     \
> >> +       CMP_TYPE aval = a[i], bval = b[i];                      \
> >> +       r[i] = aval COND bval ? xval : yval;                    \
> >> +      }                                                                \
> >> +  }
> >> +
> >> +#define DEF_VCOND_IMM(DATA_TYPE, CMP_TYPE, COND, IMM, SUFFIX)  \
> >> +  void __attribute__ ((noinline, noclone))                     \
> >> +  vcond_imm_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r,  \
> >> +                                  DATA_TYPE *__restrict__ x,   \
> >> +                                  DATA_TYPE *__restrict__ y,   \
> >> +                                  CMP_TYPE *__restrict__ a,    \
> >> +                                  int n)                       \
> >> +  {                                                            \
> >> +    for (int i = 0; i < n; i++)                                        \
> >> +      {                                                                \
> >> +       DATA_TYPE xval = x[i], yval = y[i];                     \
> >> +       CMP_TYPE aval = a[i];                                   \
> >> +       r[i] = aval COND (CMP_TYPE) IMM ? xval : yval;          \
> >> +      }                                                                \
> >> +  }
> >> +
> >> +#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX)      \
> >> +  T (int8_t, int8_t, COND, SUFFIX)                     \
> >> +  T (int16_t, int16_t, COND, SUFFIX)                   \
> >> +  T (int32_t, int32_t, COND, SUFFIX)                   \
> >> +  T (int64_t, int64_t, COND, SUFFIX)                   \
> >> +  T (float, int32_t, COND, SUFFIX##_float)             \
> >> +  T (double, int64_t, COND, SUFFIX##_double)
> >> +
> >> +#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX)    \
> >> +  T (uint8_t, uint8_t, COND, SUFFIX)                   \
> >> +  T (uint16_t, uint16_t, COND, SUFFIX)                 \
> >> +  T (uint32_t, uint32_t, COND, SUFFIX)                 \
> >> +  T (uint64_t, uint64_t, COND, SUFFIX)                 \
> >> +  T (float, uint32_t, COND, SUFFIX##_float)            \
> >> +  T (double, uint64_t, COND, SUFFIX##_double)
> >> +
> >> +#define TEST_COND_VAR_ALL(T, COND, SUFFIX)     \
> >> +  TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX)   \
> >> +  TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX)
> >> +
> >> +#define TEST_VAR_ALL(T)                                \
> >> +  TEST_COND_VAR_ALL (T, >, _gt)                        \
> >> +  TEST_COND_VAR_ALL (T, <, _lt)                        \
> >> +  TEST_COND_VAR_ALL (T, >=, _ge)               \
> >> +  TEST_COND_VAR_ALL (T, <=, _le)               \
> >> +  TEST_COND_VAR_ALL (T, ==, _eq)               \
> >> +  TEST_COND_VAR_ALL (T, !=, _ne)
> >> +
> >> +#define TEST_COND_IMM_SIGNED_ALL(T, COND, IMM, SUFFIX) \
> >> +  T (int8_t, int8_t, COND, IMM, SUFFIX)                        \
> >> +  T (int16_t, int16_t, COND, IMM, SUFFIX)              \
> >> +  T (int32_t, int32_t, COND, IMM, SUFFIX)              \
> >> +  T (int64_t, int64_t, COND, IMM, SUFFIX)              \
> >> +  T (float, int32_t, COND, IMM, SUFFIX##_float)                \
> >> +  T (double, int64_t, COND, IMM, SUFFIX##_double)
> >> +
> >> +#define TEST_COND_IMM_UNSIGNED_ALL(T, COND, IMM, SUFFIX)       \
> >> +  T (uint8_t, uint8_t, COND, IMM, SUFFIX)                      \
> >> +  T (uint16_t, uint16_t, COND, IMM, SUFFIX)                    \
> >> +  T (uint32_t, uint32_t, COND, IMM, SUFFIX)                    \
> >> +  T (uint64_t, uint64_t, COND, IMM, SUFFIX)                    \
> >> +  T (float, uint32_t, COND, IMM, SUFFIX##_float)               \
> >> +  T (double, uint64_t, COND, IMM, SUFFIX##_double)
> >> +
> >> +#define TEST_COND_IMM_ALL(T, COND, IMM, SUFFIX)                \
> >> +  TEST_COND_IMM_SIGNED_ALL (T, COND, IMM, SUFFIX)      \
> >> +  TEST_COND_IMM_UNSIGNED_ALL (T, COND, IMM, SUFFIX)
> >> +
> >> +#define TEST_IMM_ALL(T)                                                        \
> >> +  /* Expect immediates to make it into the encoding.  */               \
> >> +  TEST_COND_IMM_ALL (T, >, 5, _gt)                                     \
> >> +  TEST_COND_IMM_ALL (T, <, 5, _lt)                                     \
> >> +  TEST_COND_IMM_ALL (T, >=, 5, _ge)                                    \
> >> +  TEST_COND_IMM_ALL (T, <=, 5, _le)                                    \
> >> +  TEST_COND_IMM_ALL (T, ==, 5, _eq)                                    \
> >> +  TEST_COND_IMM_ALL (T, !=, 5, _ne)                                    \
> >> +                                                                       \
> >> +  TEST_COND_IMM_SIGNED_ALL (T, >, 15, _gt2)                            \
> >> +  TEST_COND_IMM_SIGNED_ALL (T, <, 15, _lt2)                            \
> >> +  TEST_COND_IMM_SIGNED_ALL (T, >=, 15, _ge2)                           \
> >> +  TEST_COND_IMM_SIGNED_ALL (T, <=, 15, _le2)                           \
> >> +  TEST_COND_IMM_ALL (T, ==, 15, _eq2)                                  \
> >> +  TEST_COND_IMM_ALL (T, !=, 15, _ne2)                                  \
> >> +                                                                       \
> >> +  TEST_COND_IMM_SIGNED_ALL (T, >, 16, _gt3)                            \
> >> +  TEST_COND_IMM_SIGNED_ALL (T, <, 16, _lt3)                            \
> >> +  TEST_COND_IMM_SIGNED_ALL (T, >=, 16, _ge3)                           \
> >> +  TEST_COND_IMM_SIGNED_ALL (T, <=, 16, _le3)                           \
> >> +  TEST_COND_IMM_ALL (T, ==, 16, _eq3)                                  \
> >> +  TEST_COND_IMM_ALL (T, !=, 16, _ne3)                                  \
> >> +                                                                       \
> >> +  TEST_COND_IMM_SIGNED_ALL (T, >, -16, _gt4)                           \
> >> +  TEST_COND_IMM_SIGNED_ALL (T, <, -16, _lt4)                           \
> >> +  TEST_COND_IMM_SIGNED_ALL (T, >=, -16, _ge4)                          \
> >> +  TEST_COND_IMM_SIGNED_ALL (T, <=, -16, _le4)                          \
> >> +  TEST_COND_IMM_ALL (T, ==, -16, _eq4)                                 \
> >> +  TEST_COND_IMM_ALL (T, !=, -16, _ne4)                                 \
> >> +                                                                       \
> >> +  TEST_COND_IMM_SIGNED_ALL (T, >, -17, _gt5)                           \
> >> +  TEST_COND_IMM_SIGNED_ALL (T, <, -17, _lt5)                           \
> >> +  TEST_COND_IMM_SIGNED_ALL (T, >=, -17, _ge5)                          \
> >> +  TEST_COND_IMM_SIGNED_ALL (T, <=, -17, _le5)                          \
> >> +  TEST_COND_IMM_ALL (T, ==, -17, _eq5)                                 \
> >> +  TEST_COND_IMM_ALL (T, !=, -17, _ne5)                                 \
> >> +                                                                       \
> >> +  TEST_COND_IMM_UNSIGNED_ALL (T, >, 0, _gt6)                           \
> >> +  /* Testing if an unsigned value >= 0 or < 0 is pointless as it will  \
> >> +     get folded away by the compiler.  */                              \
> >> +  TEST_COND_IMM_UNSIGNED_ALL (T, <=, 0, _le6)                          \
> >> +                                                                       \
> >> +  TEST_COND_IMM_UNSIGNED_ALL (T, >, 127, _gt7)                         \
> >> +  TEST_COND_IMM_UNSIGNED_ALL (T, <, 127, _lt7)                         \
> >> +  TEST_COND_IMM_UNSIGNED_ALL (T, >=, 127, _ge7)                                \
> >> +  TEST_COND_IMM_UNSIGNED_ALL (T, <=, 127, _le7)                                \
> >> +                                                                       \
> >> +  /* Expect immediates to NOT make it into the encoding, and instead be \
> >> +     forced into a register.  */                                       \
> >> +  TEST_COND_IMM_UNSIGNED_ALL (T, >, 128, _gt8)                         \
> >> +  TEST_COND_IMM_UNSIGNED_ALL (T, <, 128, _lt8)                         \
> >> +  TEST_COND_IMM_UNSIGNED_ALL (T, >=, 128, _ge8)                                \
> >> +  TEST_COND_IMM_UNSIGNED_ALL (T, <=, 128, _le8)
> >> +
> >> +TEST_VAR_ALL (DEF_VCOND_VAR)
> >> +TEST_IMM_ALL (DEF_VCOND_IMM)
> >> +
> >> +/* { dg-final { scan-assembler-times {\tvmseq\.vi} 42 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmsne\.vi} 42 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmsgt\.vi} 30 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmsgtu\.vi} 12 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmslt\.vi} 8 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmsge\.vi} 8 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmsle\.vi} 30 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmsleu\.vi} 12 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmseq} 78 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmsne} 78 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmsgt} 82 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmslt} 38 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmsge} 38 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmsle} 82 } } */
> >> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-2.c
> >> new file mode 100644
> >> index 00000000000..738f978c5a1
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-2.c
> >> @@ -0,0 +1,75 @@
> >> +/* { dg-do compile } */
> >> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
> >> +
> >> +#include <stdint-gcc.h>
> >> +
> >> +#define eq(A, B) ((A) == (B))
> >> +#define ne(A, B) ((A) != (B))
> >> +#define olt(A, B) ((A) < (B))
> >> +#define ole(A, B) ((A) <= (B))
> >> +#define oge(A, B) ((A) >= (B))
> >> +#define ogt(A, B) ((A) > (B))
> >> +#define ordered(A, B) (!__builtin_isunordered (A, B))
> >> +#define unordered(A, B) (__builtin_isunordered (A, B))
> >> +#define ueq(A, B) (!__builtin_islessgreater (A, B))
> >> +#define ult(A, B) (__builtin_isless (A, B))
> >> +#define ule(A, B) (__builtin_islessequal (A, B))
> >> +#define uge(A, B) (__builtin_isgreaterequal (A, B))
> >> +#define ugt(A, B) (__builtin_isgreater (A, B))
> >> +#define nueq(A, B) (__builtin_islessgreater (A, B))
> >> +#define nult(A, B) (!__builtin_isless (A, B))
> >> +#define nule(A, B) (!__builtin_islessequal (A, B))
> >> +#define nuge(A, B) (!__builtin_isgreaterequal (A, B))
> >> +#define nugt(A, B) (!__builtin_isgreater (A, B))
> >> +
> >> +#define TEST_LOOP(TYPE1, TYPE2, CMP)                           \
> >> +  void __attribute__ ((noinline, noclone))                     \
> >> +  test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest,  \
> >> +                                       TYPE1 *restrict src,    \
> >> +                                       TYPE1 fallback,         \
> >> +                                       TYPE2 *restrict a,      \
> >> +                                       TYPE2 *restrict b,      \
> >> +                                       int count)              \
> >> +  {                                                            \
> >> +    for (int i = 0; i < count; ++i)                            \
> >> +      {\
> >> +        TYPE2 aval = a[i]; \
> >> +        TYPE2 bval = b[i]; \
> >> +        TYPE1 srcval = src[i]; \
> >> +        dest[i] = CMP (aval, bval) ? srcval : fallback;                \
> >> +      }\
> >> +  }
> >> +
> >> +#define TEST_CMP(CMP) \
> >> +  TEST_LOOP (int32_t, float, CMP) \
> >> +  TEST_LOOP (uint32_t, float, CMP) \
> >> +  TEST_LOOP (float, float, CMP) \
> >> +  TEST_LOOP (int64_t, double, CMP) \
> >> +  TEST_LOOP (uint64_t, double, CMP) \
> >> +  TEST_LOOP (double, double, CMP)
> >> +
> >> +TEST_CMP (eq)
> >> +TEST_CMP (ne)
> >> +TEST_CMP (olt)
> >> +TEST_CMP (ole)
> >> +TEST_CMP (oge)
> >> +TEST_CMP (ogt)
> >> +TEST_CMP (ordered)
> >> +TEST_CMP (unordered)
> >> +TEST_CMP (ueq)
> >> +TEST_CMP (ult)
> >> +TEST_CMP (ule)
> >> +TEST_CMP (uge)
> >> +TEST_CMP (ugt)
> >> +TEST_CMP (nueq)
> >> +TEST_CMP (nult)
> >> +TEST_CMP (nule)
> >> +TEST_CMP (nuge)
> >> +TEST_CMP (nugt)
> >> +
> >> +/* { dg-final { scan-assembler-times {\tvmfeq} 150 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmfne} 6 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmfgt} 30 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmflt} 30 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmfge} 18 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmfle} 18 } } */
> >> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-3.c
> >> new file mode 100644
> >> index 00000000000..53384829e64
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-3.c
> >> @@ -0,0 +1,13 @@
> >> +/* { dg-do compile } */
> >> +/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-trapping-math" } */
> >> +
> >> +/* The difference here is that nueq can use LTGT.  */
> >> +
> >> +#include "vcond-2.c"
> >> +
> >> +/* { dg-final { scan-assembler-times {\tvmfeq} 90 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmfne} 6 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmfgt} 30 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmflt} 30 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmfge} 18 } } */
> >> +/* { dg-final { scan-assembler-times {\tvmfle} 18 } } */
> >> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-1.c
> >> new file mode 100644
> >> index 00000000000..a84d22d2a73
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-1.c
> >> @@ -0,0 +1,49 @@
> >> +/* { dg-do run { target { riscv_vector } } } */
> >> +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
> >> +
> >> +#include "vcond-1.c"
> >> +
> >> +#define N 97
> >> +
> >> +#define TEST_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX)      \
> >> +{                                                              \
> >> +  DATA_TYPE x[N], y[N], r[N];                                  \
> >> +  CMP_TYPE a[N], b[N];                                         \
> >> +  for (int i = 0; i < N; ++i)                                  \
> >> +    {                                                          \
> >> +      x[i] = i;                                                        \
> >> +      y[i] = (i & 1) + 5;                                      \
> >> +      a[i] = i - N / 3;                                                \
> >> +      b[i] = N - N / 3 - i;                                    \
> >> +      asm volatile ("" ::: "memory");                          \
> >> +    }                                                          \
> >> +  vcond_var_##CMP_TYPE##_##SUFFIX (r, x, y, a, b, N);          \
> >> +  for (int i = 0; i < N; ++i)                                  \
> >> +    if (r[i] != (a[i] COND b[i] ? x[i] : y[i]))                        \
> >> +      __builtin_abort ();                                      \
> >> +}
> >> +
> >> +#define TEST_VCOND_IMM(DATA_TYPE, CMP_TYPE, COND, IMM, SUFFIX) \
> >> +{                                                              \
> >> +  DATA_TYPE x[N], y[N], r[N];                                  \
> >> +  CMP_TYPE a[N];                                               \
> >> +  for (int i = 0; i < N; ++i)                                  \
> >> +    {                                                          \
> >> +      x[i] = i;                                                        \
> >> +      y[i] = (i & 1) + 5;                                      \
> >> +      a[i] = IMM - N / 3 + i;                                  \
> >> +      asm volatile ("" ::: "memory");                          \
> >> +    }                                                          \
> >> +  vcond_imm_##CMP_TYPE##_##SUFFIX (r, x, y, a, N);             \
> >> +  for (int i = 0; i < N; ++i)                                  \
> >> +    if (r[i] != (a[i] COND (CMP_TYPE) IMM ? x[i] : y[i]))      \
> >> +      __builtin_abort ();                                      \
> >> +}
> >> +
> >> +int __attribute__ ((optimize (1)))
> >> +main (int argc, char **argv)
> >> +{
> >> +  TEST_VAR_ALL (TEST_VCOND_VAR)
> >> +  TEST_IMM_ALL (TEST_VCOND_IMM)
> >> +  return 0;
> >> +}
> >> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-2.c
> >> new file mode 100644
> >> index 00000000000..56fd39f4691
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-2.c
> >> @@ -0,0 +1,76 @@
> >> +/* { dg-do run { target { riscv_vector } } } */
> >> +/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
> >> +/* { dg-require-effective-target fenv_exceptions } */
> >> +
> >> +#include "vcond-2.c"
> >> +
> >> +#ifndef TEST_EXCEPTIONS
> >> +#define TEST_EXCEPTIONS 1
> >> +#endif
> >> +
> >> +#include <fenv.h>
> >> +
> >> +#define N 401
> >> +
> >> +#define RUN_LOOP(TYPE1, TYPE2, CMP, EXPECT_INVALID)                    \
> >> +  {                                                                    \
> >> +    TYPE1 dest[N], src[N];                                             \
> >> +    TYPE2 a[N], b[N];                                                  \
> >> +    for (int i = 0; i < N; ++i)                                                \
> >> +      {                                                                        \
> >> +       src[i] = i * i;                                                 \
> >> +       if (i % 5 == 0)                                                 \
> >> +         a[i] = 0;                                                     \
> >> +       else if (i % 3)                                                 \
> >> +         a[i] = i * 0.1;                                               \
> >> +       else                                                            \
> >> +         a[i] = i;                                                     \
> >> +       if (i % 7 == 0)                                                 \
> >> +         b[i] = __builtin_nan ("");                                    \
> >> +       else if (i % 6)                                                 \
> >> +         b[i] = i * 0.1;                                               \
> >> +       else                                                            \
> >> +         b[i] = i;                                                     \
> >> +       asm volatile ("" ::: "memory");                                 \
> >> +      }                                                                        \
> >> +    feclearexcept (FE_ALL_EXCEPT);                                     \
> >> +    test_##TYPE1##_##TYPE2##_##CMP##_var (dest, src, 11, a, b, N);     \
> >> +    if (TEST_EXCEPTIONS                                                        \
> >> +       && !fetestexcept (FE_INVALID) != !(EXPECT_INVALID))             \
> >> +      __builtin_abort ();                                              \
> >> +    for (int i = 0; i < N; ++i)                                                \
> >> +      if (dest[i] != (CMP (a[i], b[i]) ? src[i] : 11))                 \
> >> +       __builtin_abort ();                                             \
> >> +  }
> >> +
> >> +#define RUN_CMP(CMP, EXPECT_INVALID) \
> >> +  RUN_LOOP (int32_t, float, CMP, EXPECT_INVALID) \
> >> +  RUN_LOOP (uint32_t, float, CMP, EXPECT_INVALID) \
> >> +  RUN_LOOP (float, float, CMP, EXPECT_INVALID) \
> >> +  RUN_LOOP (int64_t, double, CMP, EXPECT_INVALID) \
> >> +  RUN_LOOP (uint64_t, double, CMP, EXPECT_INVALID) \
> >> +  RUN_LOOP (double, double, CMP, EXPECT_INVALID)
> >> +
> >> +int __attribute__ ((optimize (1)))
> >> +main (void)
> >> +{
> >> +  RUN_CMP (eq, 0)
> >> +  RUN_CMP (ne, 0)
> >> +  RUN_CMP (olt, 1)
> >> +  RUN_CMP (ole, 1)
> >> +  RUN_CMP (oge, 1)
> >> +  RUN_CMP (ogt, 1)
> >> +  RUN_CMP (ordered, 0)
> >> +  RUN_CMP (unordered, 0)
> >> +  RUN_CMP (ueq, 0)
> >> +  RUN_CMP (ult, 0)
> >> +  RUN_CMP (ule, 0)
> >> +  RUN_CMP (uge, 0)
> >> +  RUN_CMP (ugt, 0)
> >> +  RUN_CMP (nueq, 0)
> >> +  RUN_CMP (nult, 0)
> >> +  RUN_CMP (nule, 0)
> >> +  RUN_CMP (nuge, 0)
> >> +  RUN_CMP (nugt, 0)
> >> +  return 0;
> >> +}
> >> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-3.c
> >> new file mode 100644
> >> index 00000000000..e50d561bd98
> >> --- /dev/null
> >> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-3.c
> >> @@ -0,0 +1,6 @@
> >> +/* { dg-do run { target { riscv_vector } } } */
> >> +/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-trapping-math" } */
> >> +/* { dg-require-effective-target fenv_exceptions } */
> >> +
> >> +#define TEST_EXCEPTIONS 0
> >> +#include "vcond_run-2.c"
> >> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
> >> index bc99cc0c3cf..9809a421fc8 100644
> >> --- a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
> >> +++ b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
> >> @@ -63,6 +63,8 @@ foreach op $AUTOVEC_TEST_OPTS {
> >>      "" "$op"
> >>    dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/binop/*.\[cS\]]] \
> >>      "" "$op"
> >> +  dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/cmp/*.\[cS\]]] \
> >> +    "" "$op"
> >>  }
> >>
> >>  # VLS-VLMAX tests
> >> --
> >> 2.36.3
> >>
diff mbox series

Patch

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 04b4459222a..e0258e8b798 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -162,3 +162,115 @@ 
 				 riscv_vector::RVV_BINOP, operands);
   DONE;
 })
+
+;; =========================================================================
+;; == Comparisons and selects
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Select based on masks
+;; -------------------------------------------------------------------------
+;; Includes merging patterns for:
+;; - vmerge.vv
+;; - vmerge.vx
+;; - vfmerge.vf
+;; -------------------------------------------------------------------------
+
+(define_expand "@vcond_mask_<mode><vm>"
+  [(match_operand:V 0 "register_operand")
+   (match_operand:<VM> 3 "register_operand")
+   (match_operand:V 1 "nonmemory_operand")
+   (match_operand:V 2 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    /* The order of vcond_mask is opposite to pred_merge.  */
+    std::swap (operands[1], operands[2]);
+    riscv_vector::emit_vlmax_merge_insn (code_for_pred_merge (<MODE>mode),
+    			riscv_vector::RVV_MERGE_OP, operands);
+    DONE;
+  }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Comparisons
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - vms<eq/ne/ltu/lt/leu/le/gtu/gt>.<vv/vx/vi>
+;; -------------------------------------------------------------------------
+
+(define_expand "vec_cmp<mode><vm>"
+  [(set (match_operand:<VM> 0 "register_operand")
+	(match_operator:<VM> 1 "comparison_operator"
+	  [(match_operand:VI 2 "register_operand")
+	   (match_operand:VI 3 "register_operand")]))]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_vec_cmp (operands[0], GET_CODE (operands[1]),
+				  operands[2], operands[3]);
+    DONE;
+  }
+)
+
+(define_expand "vec_cmpu<mode><vm>"
+  [(set (match_operand:<VM> 0 "register_operand")
+	(match_operator:<VM> 1 "comparison_operator"
+	  [(match_operand:VI 2 "register_operand")
+	   (match_operand:VI 3 "register_operand")]))]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_vec_cmp (operands[0], GET_CODE (operands[1]),
+				  operands[2], operands[3]);
+    DONE;
+  }
+)
+
+(define_expand "vec_cmp<mode><vm>"
+  [(set (match_operand:<VM> 0 "register_operand")
+	(match_operator:<VM> 1 "comparison_operator"
+	  [(match_operand:VF 2 "register_operand")
+	   (match_operand:VF 3 "register_operand")]))]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_vec_cmp_float (operands[0], GET_CODE (operands[1]),
+				        operands[2], operands[3], false);
+    DONE;
+  }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Compare and select
+;; -------------------------------------------------------------------------
+;; The patterns in this section are synthetic.
+;; -------------------------------------------------------------------------
+
+(define_expand "vcond<V:mode><VI:mode>"
+  [(set (match_operand:V 0 "register_operand")
+	(if_then_else:V
+	  (match_operator 3 "comparison_operator"
+	    [(match_operand:VI 4 "register_operand")
+	     (match_operand:VI 5 "register_operand")])
+	  (match_operand:V 1 "register_operand")
+	  (match_operand:V 2 "register_operand")))]
+  "TARGET_VECTOR && known_eq (GET_MODE_NUNITS (<V:MODE>mode),
+  		GET_MODE_NUNITS (<VI:MODE>mode))"
+  {
+    riscv_vector::expand_vcond (operands);
+    DONE;
+  }
+)
+
+(define_expand "vcondu<V:mode><VI:mode>"
+  [(set (match_operand:V 0 "register_operand")
+	(if_then_else:V
+	  (match_operator 3 "comparison_operator"
+	    [(match_operand:VI 4 "register_operand")
+	     (match_operand:VI 5 "register_operand")])
+	  (match_operand:V 1 "register_operand")
+	  (match_operand:V 2 "register_operand")))]
+  "TARGET_VECTOR && known_eq (GET_MODE_NUNITS (<V:MODE>mode),
+  		GET_MODE_NUNITS (<VI:MODE>mode))"
+  {
+    riscv_vector::expand_vcond (operands);
+    DONE;
+  }
+)
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 0ae4656befb..58e55c234cb 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -137,6 +137,8 @@  enum insn_type
   RVV_MISC_OP = 1,
   RVV_UNOP = 2,
   RVV_BINOP = 3,
+  RVV_MERGE_OP = 4,
+  RVV_CMP_OP = 4,
 };
 enum vlmul_type
 {
@@ -174,6 +176,8 @@  void emit_vlmax_vsetvl (machine_mode, rtx);
 void emit_hard_vlmax_vsetvl (machine_mode, rtx);
 void emit_vlmax_insn (unsigned, int, rtx *);
 void emit_nonvlmax_insn (unsigned, int, rtx *);
+void emit_vlmax_merge_insn (unsigned, int, rtx *);
+void emit_vlmax_cmp_insn (unsigned, int, rtx *);
 enum vlmul_type get_vlmul (machine_mode);
 unsigned int get_ratio (machine_mode);
 unsigned int get_nf (machine_mode);
@@ -204,6 +208,8 @@  bool simm5_p (rtx);
 bool neg_simm5_p (rtx);
 #ifdef RTX_CODE
 bool has_vi_variant_p (rtx_code, rtx);
+void expand_vec_cmp (rtx, rtx_code, rtx, rtx);
+bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
 #endif
 bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
 			  bool, void (*)(rtx *, rtx));
@@ -226,6 +232,7 @@  machine_mode preferred_simd_mode (scalar_mode);
 opt_machine_mode get_mask_mode (machine_mode);
 void expand_vec_series (rtx, rtx, rtx);
 void expand_vec_init (rtx, rtx);
+void expand_vcond (rtx *);
 /* Rounding mode bitfield for fixed point VXRM.  */
 enum vxrm_field_enum
 {
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 478a052a779..e7c0ec226cf 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -169,7 +169,7 @@  public:
 
     if (m_needs_avl_p)
       {
-	rtx len = ops[m_op_num];
+	rtx len;
 	if (m_vlmax_p)
 	  {
 	    if (const_vlmax_p (m_dest_mode))
@@ -185,6 +185,16 @@  public:
 		len = gen_reg_rtx (Pmode);
 		emit_vlmax_vsetvl (m_dest_mode, len);
 	      }
+	    else
+	      {
+		gcc_assert (ops[m_op_num]);
+		len = ops[m_op_num];
+	      }
+	  }
+	else
+	  {
+	    gcc_assert (ops[m_op_num]);
+	    len = ops[m_op_num];
 	  }
 	add_input_operand (len, Pmode);
       }
@@ -341,15 +351,15 @@  autovec_use_vlmax_p (void)
 void
 emit_vlmax_insn (unsigned icode, int op_num, rtx *ops)
 {
-  machine_mode data_mode = GET_MODE (ops[0]);
-  machine_mode mask_mode = get_mask_mode (data_mode).require ();
+  machine_mode dest_mode = GET_MODE (ops[0]);
+  machine_mode mask_mode = get_mask_mode (dest_mode).require ();
   /* We have a maximum of 11 operands for RVV instruction patterns according to
    * vector.md.  */
   insn_expander<11> e (/*OP_NUM*/ op_num, /*HAS_DEST_P*/ true,
 		       /*FULLY_UNMASKED_P*/ true,
 		       /*USE_REAL_MERGE_P*/ false, /*HAS_AVL_P*/ true,
 		       /*VLMAX_P*/ true,
-		       /*DEST_MODE*/ data_mode, /*MASK_MODE*/ mask_mode);
+		       /*DEST_MODE*/ dest_mode, /*MASK_MODE*/ mask_mode);
   e.set_policy (TAIL_ANY);
   e.set_policy (MASK_ANY);
   e.emit_insn ((enum insn_code) icode, ops);
@@ -360,20 +370,52 @@  emit_vlmax_insn (unsigned icode, int op_num, rtx *ops)
 void
 emit_nonvlmax_insn (unsigned icode, int op_num, rtx *ops)
 {
-  machine_mode data_mode = GET_MODE (ops[0]);
-  machine_mode mask_mode = get_mask_mode (data_mode).require ();
+  machine_mode dest_mode = GET_MODE (ops[0]);
+  machine_mode mask_mode = get_mask_mode (dest_mode).require ();
   /* We have a maximum of 11 operands for RVV instruction patterns according to
    * vector.md.  */
   insn_expander<11> e (/*OP_NUM*/ op_num, /*HAS_DEST_P*/ true,
 		       /*FULLY_UNMASKED_P*/ true,
 		       /*USE_REAL_MERGE_P*/ false, /*HAS_AVL_P*/ true,
 		       /*VLMAX_P*/ false,
-		       /*DEST_MODE*/ data_mode, /*MASK_MODE*/ mask_mode);
+		       /*DEST_MODE*/ dest_mode, /*MASK_MODE*/ mask_mode);
   e.set_policy (TAIL_ANY);
   e.set_policy (MASK_ANY);
   e.emit_insn ((enum insn_code) icode, ops);
 }
 
+/* This function emits merge instruction.  */
+void
+emit_vlmax_merge_insn (unsigned icode, int op_num, rtx *ops)
+{
+  machine_mode dest_mode = GET_MODE (ops[0]);
+  machine_mode mask_mode = get_mask_mode (dest_mode).require ();
+  insn_expander<11> e (/*OP_NUM*/ op_num, /*HAS_DEST_P*/ true,
+		       /*FULLY_UNMASKED_P*/ false,
+		       /*USE_REAL_MERGE_P*/ false, /*HAS_AVL_P*/ true,
+		       /*VLMAX_P*/ true,
+		       dest_mode, mask_mode);
+  e.set_policy (TAIL_ANY);
+  e.emit_insn ((enum insn_code) icode, ops);
+}
+
+/* This function emits cmp instruction.  */
+void
+emit_vlmax_cmp_insn (unsigned icode, int op_num, rtx *ops)
+{
+  machine_mode mode = GET_MODE (ops[0]);
+  bool fully_unmasked_p = op_num == RVV_CMP_OP ? true : false;
+  bool use_real_merge_p = op_num == RVV_CMP_OP ? false : true;
+  insn_expander<11> e (/*OP_NUM*/ op_num, /*HAS_DEST_P*/ true,
+		       /*FULLY_UNMASKED_P*/ fully_unmasked_p,
+		       /*USE_REAL_MERGE_P*/ use_real_merge_p,
+		       /*HAS_AVL_P*/ true,
+		       /*VLMAX_P*/ true,
+		       /*DEST_MODE*/ mode, /*MASK_MODE*/ mode);
+  e.set_policy (op_num == RVV_CMP_OP ? MASK_UNDISTURBED : MASK_ANY);
+  e.emit_insn ((enum insn_code) icode, ops);
+}
+
 /* Expand series const vector.  */
 
 void
@@ -1318,4 +1360,214 @@  expand_vec_init (rtx target, rtx vals)
   expand_vector_init_insert_elems (target, v, nelts);
 }
 
+/* Get insn code for corresponding comparison.  */
+
+static insn_code
+get_cmp_insn_code (rtx_code code, machine_mode mode)
+{
+  insn_code icode;
+  switch (code)
+    {
+    case EQ:
+    case NE:
+    case LE:
+    case LEU:
+    case GT:
+    case GTU:
+    case LTGT:
+      icode = code_for_pred_cmp (mode);
+      break;
+    case LT:
+    case LTU:
+    case GE:
+    case GEU:
+      if (FLOAT_MODE_P (mode))
+	icode = code_for_pred_cmp (mode);
+      else
+	icode = code_for_pred_ltge (mode);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  return icode;
+}
+
+/* Expand an RVV comparison.  */
+
+void
+expand_vec_cmp (rtx target, rtx_code code, rtx op0, rtx op1)
+{
+  machine_mode mask_mode = GET_MODE (target);
+  machine_mode data_mode = GET_MODE (op0);
+  insn_code icode = get_cmp_insn_code (code, data_mode);
+
+  if (code == LTGT)
+    {
+      rtx lt = gen_reg_rtx (mask_mode);
+      rtx gt = gen_reg_rtx (mask_mode);
+      expand_vec_cmp (lt, LT, op0, op1);
+      expand_vec_cmp (gt, GT, op0, op1);
+      icode = code_for_pred (IOR, mask_mode);
+      rtx ops[3] = {target, lt, gt};
+      emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, ops);
+      return;
+    }
+
+  rtx cmp = gen_rtx_fmt_ee (code, mask_mode, op0, op1);
+  rtx ops[RVV_CMP_OP] = {target, cmp, op0, op1};
+  emit_vlmax_cmp_insn (icode, RVV_CMP_OP, ops);
+}
+
+void
+expand_vec_cmp (rtx target, rtx_code code, rtx mask, rtx maskoff, rtx op0,
+		rtx op1)
+{
+  machine_mode mask_mode = GET_MODE (target);
+  machine_mode data_mode = GET_MODE (op0);
+  insn_code icode = get_cmp_insn_code (code, data_mode);
+
+  if (code == LTGT)
+    {
+      rtx lt = gen_reg_rtx (mask_mode);
+      rtx gt = gen_reg_rtx (mask_mode);
+      expand_vec_cmp (lt, LT, mask, maskoff, op0, op1);
+      expand_vec_cmp (gt, GT, mask, maskoff, op0, op1);
+      icode = code_for_pred (IOR, mask_mode);
+      rtx ops[RVV_BINOP] = {target, lt, gt};
+      emit_vlmax_insn (icode, RVV_BINOP, ops);
+      return;
+    }
+
+  rtx cmp = gen_rtx_fmt_ee (code, mask_mode, op0, op1);
+  rtx ops[RVV_CMP_OP + 2] = {target, mask, maskoff, cmp, op0, op1};
+  emit_vlmax_cmp_insn (icode, RVV_CMP_OP + 2, ops);
+}
+
+/* Expand an RVV floating-point comparison:
+
+   If CAN_INVERT_P is true, the caller can also handle inverted results;
+   return true if the result is in fact inverted.  */
+
+bool
+expand_vec_cmp_float (rtx target, rtx_code code, rtx op0, rtx op1,
+		      bool can_invert_p)
+{
+  machine_mode mask_mode = GET_MODE (target);
+  machine_mode data_mode = GET_MODE (op0);
+
+  /* If can_invert_p = true:
+     It suffices to implement a u>= b as !(a < b) but with the NaNs masked off:
+
+       vmfeq.vv    v0, va, va
+       vmfeq.vv    v1, vb, vb
+       vmand.mm    v0, v0, v1
+       vmflt.vv    v0, va, vb, v0.t
+       vmnot.m     v0, v0
+
+     And, if !HONOR_SNANS, then you can remove the vmand.mm by masking the
+     second vmfeq.vv:
+
+       vmfeq.vv    v0, va, va
+       vmfeq.vv    v0, vb, vb, v0.t
+       vmflt.vv    v0, va, vb, v0.t
+       vmnot.m     v0, v0
+
+     If can_invert_p = false:
+
+       # Example of implementing isgreater()
+       vmfeq.vv v0, va, va        # Only set where A is not NaN.
+       vmfeq.vv v1, vb, vb        # Only set where B is not NaN.
+       vmand.mm v0, v0, v1        # Only set where A and B are ordered,
+       vmfgt.vv v0, va, vb, v0.t  #  so only set flags on ordered values.
+  */
+
+  rtx eq0 = gen_reg_rtx (mask_mode);
+  rtx eq1 = gen_reg_rtx (mask_mode);
+  switch (code)
+    {
+    case EQ:
+    case NE:
+    case LT:
+    case LE:
+    case GT:
+    case GE:
+    case LTGT:
+      /* There is native support for the comparison.  */
+      expand_vec_cmp (target, code, op0, op1);
+      return false;
+    case UNEQ:
+    case ORDERED:
+    case UNORDERED:
+    case UNLT:
+    case UNLE:
+    case UNGT:
+    case UNGE:
+      /* vmfeq.vv v0, va, va  */
+      expand_vec_cmp (eq0, EQ, op0, op0);
+      if (HONOR_SNANS (data_mode))
+	{
+	  /*
+	     vmfeq.vv    v1, vb, vb
+	     vmand.mm    v0, v0, v1
+	  */
+	  expand_vec_cmp (eq1, EQ, op1, op1);
+	  insn_code icode = code_for_pred (AND, mask_mode);
+	  rtx ops[3] = {eq0, eq0, eq1};
+	  emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, ops);
+	}
+      else
+	{
+	  /* vmfeq.vv    v0, vb, vb, v0.t  */
+	  expand_vec_cmp (eq0, EQ, eq0, eq0, op1, op1);
+	}
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (code == ORDERED)
+    {
+      emit_move_insn (target, eq0);
+      return false;
+    }
+
+  /* There is native support for the inverse comparison.  */
+  code = reverse_condition_maybe_unordered (code);
+  if (code == ORDERED)
+    emit_move_insn (target, eq0);
+  else
+    expand_vec_cmp (eq0, code, eq0, eq0, op0, op1);
+
+  if (can_invert_p)
+    {
+      emit_move_insn (target, eq0);
+      return true;
+    }
+  insn_code icode = code_for_pred_not (mask_mode);
+  rtx ops[RVV_UNOP] = {target, eq0};
+  emit_vlmax_insn (icode, RVV_UNOP, ops);
+  return false;
+}
+
+/* Expand an RVV vcond pattern with operands OPS.  DATA_MODE is the mode
+   of the data being merged and CMP_MODE is the mode of the values being
+   compared.  */
+
+void
+expand_vcond (rtx *ops)
+{
+  machine_mode cmp_mode = GET_MODE (ops[4]);
+  machine_mode data_mode = GET_MODE (ops[1]);
+  machine_mode mask_mode = get_mask_mode (cmp_mode).require ();
+  rtx mask = gen_reg_rtx (mask_mode);
+  if (FLOAT_MODE_P (cmp_mode))
+    {
+      if (expand_vec_cmp_float (mask, GET_CODE (ops[3]), ops[4], ops[5], true))
+	std::swap (ops[1], ops[2]);
+    }
+  else
+    expand_vec_cmp (mask, GET_CODE (ops[3]), ops[4], ops[5]);
+  emit_insn (gen_vcond_mask (data_mode, data_mode, ops[0], ops[1], ops[2], mask));
+}
+
 } // namespace riscv_vector
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
new file mode 100644
index 00000000000..c882654cb49
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-1.c
@@ -0,0 +1,157 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+
+#include <stdint-gcc.h>
+
+#define DEF_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX)	\
+  void __attribute__ ((noinline, noclone))			\
+  vcond_var_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r,	\
+				   DATA_TYPE *__restrict__ x,	\
+				   DATA_TYPE *__restrict__ y,	\
+				   CMP_TYPE *__restrict__ a,	\
+				   CMP_TYPE *__restrict__ b,	\
+				   int n)			\
+  {								\
+    for (int i = 0; i < n; i++)					\
+      {								\
+	DATA_TYPE xval = x[i], yval = y[i];			\
+	CMP_TYPE aval = a[i], bval = b[i];			\
+	r[i] = aval COND bval ? xval : yval;			\
+      }								\
+  }
+
+#define DEF_VCOND_IMM(DATA_TYPE, CMP_TYPE, COND, IMM, SUFFIX)	\
+  void __attribute__ ((noinline, noclone))			\
+  vcond_imm_##CMP_TYPE##_##SUFFIX (DATA_TYPE *__restrict__ r,	\
+				   DATA_TYPE *__restrict__ x,	\
+				   DATA_TYPE *__restrict__ y,	\
+				   CMP_TYPE *__restrict__ a,	\
+				   int n)			\
+  {								\
+    for (int i = 0; i < n; i++)					\
+      {								\
+	DATA_TYPE xval = x[i], yval = y[i];			\
+	CMP_TYPE aval = a[i];					\
+	r[i] = aval COND (CMP_TYPE) IMM ? xval : yval;		\
+      }								\
+  }
+
+#define TEST_COND_VAR_SIGNED_ALL(T, COND, SUFFIX)	\
+  T (int8_t, int8_t, COND, SUFFIX)			\
+  T (int16_t, int16_t, COND, SUFFIX)			\
+  T (int32_t, int32_t, COND, SUFFIX)			\
+  T (int64_t, int64_t, COND, SUFFIX)			\
+  T (float, int32_t, COND, SUFFIX##_float)		\
+  T (double, int64_t, COND, SUFFIX##_double)
+
+#define TEST_COND_VAR_UNSIGNED_ALL(T, COND, SUFFIX)	\
+  T (uint8_t, uint8_t, COND, SUFFIX)			\
+  T (uint16_t, uint16_t, COND, SUFFIX)			\
+  T (uint32_t, uint32_t, COND, SUFFIX)			\
+  T (uint64_t, uint64_t, COND, SUFFIX)			\
+  T (float, uint32_t, COND, SUFFIX##_float)		\
+  T (double, uint64_t, COND, SUFFIX##_double)
+
+#define TEST_COND_VAR_ALL(T, COND, SUFFIX)	\
+  TEST_COND_VAR_SIGNED_ALL (T, COND, SUFFIX)	\
+  TEST_COND_VAR_UNSIGNED_ALL (T, COND, SUFFIX)
+
+#define TEST_VAR_ALL(T)				\
+  TEST_COND_VAR_ALL (T, >, _gt)			\
+  TEST_COND_VAR_ALL (T, <, _lt)			\
+  TEST_COND_VAR_ALL (T, >=, _ge)		\
+  TEST_COND_VAR_ALL (T, <=, _le)		\
+  TEST_COND_VAR_ALL (T, ==, _eq)		\
+  TEST_COND_VAR_ALL (T, !=, _ne)
+
+#define TEST_COND_IMM_SIGNED_ALL(T, COND, IMM, SUFFIX)	\
+  T (int8_t, int8_t, COND, IMM, SUFFIX)			\
+  T (int16_t, int16_t, COND, IMM, SUFFIX)		\
+  T (int32_t, int32_t, COND, IMM, SUFFIX)		\
+  T (int64_t, int64_t, COND, IMM, SUFFIX)		\
+  T (float, int32_t, COND, IMM, SUFFIX##_float)		\
+  T (double, int64_t, COND, IMM, SUFFIX##_double)
+
+#define TEST_COND_IMM_UNSIGNED_ALL(T, COND, IMM, SUFFIX)	\
+  T (uint8_t, uint8_t, COND, IMM, SUFFIX)			\
+  T (uint16_t, uint16_t, COND, IMM, SUFFIX)			\
+  T (uint32_t, uint32_t, COND, IMM, SUFFIX)			\
+  T (uint64_t, uint64_t, COND, IMM, SUFFIX)			\
+  T (float, uint32_t, COND, IMM, SUFFIX##_float)		\
+  T (double, uint64_t, COND, IMM, SUFFIX##_double)
+
+#define TEST_COND_IMM_ALL(T, COND, IMM, SUFFIX)		\
+  TEST_COND_IMM_SIGNED_ALL (T, COND, IMM, SUFFIX)	\
+  TEST_COND_IMM_UNSIGNED_ALL (T, COND, IMM, SUFFIX)
+
+#define TEST_IMM_ALL(T)							\
+  /* Expect immediates to make it into the encoding.  */		\
+  TEST_COND_IMM_ALL (T, >, 5, _gt)					\
+  TEST_COND_IMM_ALL (T, <, 5, _lt)					\
+  TEST_COND_IMM_ALL (T, >=, 5, _ge)					\
+  TEST_COND_IMM_ALL (T, <=, 5, _le)					\
+  TEST_COND_IMM_ALL (T, ==, 5, _eq)					\
+  TEST_COND_IMM_ALL (T, !=, 5, _ne)					\
+									\
+  TEST_COND_IMM_SIGNED_ALL (T, >, 15, _gt2)				\
+  TEST_COND_IMM_SIGNED_ALL (T, <, 15, _lt2)				\
+  TEST_COND_IMM_SIGNED_ALL (T, >=, 15, _ge2)				\
+  TEST_COND_IMM_SIGNED_ALL (T, <=, 15, _le2)				\
+  TEST_COND_IMM_ALL (T, ==, 15, _eq2)					\
+  TEST_COND_IMM_ALL (T, !=, 15, _ne2)					\
+									\
+  TEST_COND_IMM_SIGNED_ALL (T, >, 16, _gt3)				\
+  TEST_COND_IMM_SIGNED_ALL (T, <, 16, _lt3)				\
+  TEST_COND_IMM_SIGNED_ALL (T, >=, 16, _ge3)				\
+  TEST_COND_IMM_SIGNED_ALL (T, <=, 16, _le3)				\
+  TEST_COND_IMM_ALL (T, ==, 16, _eq3)					\
+  TEST_COND_IMM_ALL (T, !=, 16, _ne3)					\
+									\
+  TEST_COND_IMM_SIGNED_ALL (T, >, -16, _gt4)				\
+  TEST_COND_IMM_SIGNED_ALL (T, <, -16, _lt4)				\
+  TEST_COND_IMM_SIGNED_ALL (T, >=, -16, _ge4)				\
+  TEST_COND_IMM_SIGNED_ALL (T, <=, -16, _le4)				\
+  TEST_COND_IMM_ALL (T, ==, -16, _eq4)					\
+  TEST_COND_IMM_ALL (T, !=, -16, _ne4)					\
+									\
+  TEST_COND_IMM_SIGNED_ALL (T, >, -17, _gt5)				\
+  TEST_COND_IMM_SIGNED_ALL (T, <, -17, _lt5)				\
+  TEST_COND_IMM_SIGNED_ALL (T, >=, -17, _ge5)				\
+  TEST_COND_IMM_SIGNED_ALL (T, <=, -17, _le5)				\
+  TEST_COND_IMM_ALL (T, ==, -17, _eq5)					\
+  TEST_COND_IMM_ALL (T, !=, -17, _ne5)					\
+									\
+  TEST_COND_IMM_UNSIGNED_ALL (T, >, 0, _gt6)				\
+  /* Testing if an unsigned value >= 0 or < 0 is pointless as it will	\
+     get folded away by the compiler.  */				\
+  TEST_COND_IMM_UNSIGNED_ALL (T, <=, 0, _le6)				\
+									\
+  TEST_COND_IMM_UNSIGNED_ALL (T, >, 127, _gt7)				\
+  TEST_COND_IMM_UNSIGNED_ALL (T, <, 127, _lt7)				\
+  TEST_COND_IMM_UNSIGNED_ALL (T, >=, 127, _ge7)				\
+  TEST_COND_IMM_UNSIGNED_ALL (T, <=, 127, _le7)				\
+									\
+  /* Expect immediates to NOT make it into the encoding, and instead be \
+     forced into a register.  */					\
+  TEST_COND_IMM_UNSIGNED_ALL (T, >, 128, _gt8)				\
+  TEST_COND_IMM_UNSIGNED_ALL (T, <, 128, _lt8)				\
+  TEST_COND_IMM_UNSIGNED_ALL (T, >=, 128, _ge8)				\
+  TEST_COND_IMM_UNSIGNED_ALL (T, <=, 128, _le8)
+
+TEST_VAR_ALL (DEF_VCOND_VAR)
+TEST_IMM_ALL (DEF_VCOND_IMM)
+
+/* { dg-final { scan-assembler-times {\tvmseq\.vi} 42 } } */
+/* { dg-final { scan-assembler-times {\tvmsne\.vi} 42 } } */
+/* { dg-final { scan-assembler-times {\tvmsgt\.vi} 30 } } */
+/* { dg-final { scan-assembler-times {\tvmsgtu\.vi} 12 } } */
+/* { dg-final { scan-assembler-times {\tvmslt\.vi} 8 } } */
+/* { dg-final { scan-assembler-times {\tvmsge\.vi} 8 } } */
+/* { dg-final { scan-assembler-times {\tvmsle\.vi} 30 } } */
+/* { dg-final { scan-assembler-times {\tvmsleu\.vi} 12 } } */
+/* { dg-final { scan-assembler-times {\tvmseq} 78 } } */
+/* { dg-final { scan-assembler-times {\tvmsne} 78 } } */
+/* { dg-final { scan-assembler-times {\tvmsgt} 82 } } */
+/* { dg-final { scan-assembler-times {\tvmslt} 38 } } */
+/* { dg-final { scan-assembler-times {\tvmsge} 38 } } */
+/* { dg-final { scan-assembler-times {\tvmsle} 82 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-2.c
new file mode 100644
index 00000000000..738f978c5a1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-2.c
@@ -0,0 +1,75 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable" } */
+
+#include <stdint-gcc.h>
+
+#define eq(A, B) ((A) == (B))
+#define ne(A, B) ((A) != (B))
+#define olt(A, B) ((A) < (B))
+#define ole(A, B) ((A) <= (B))
+#define oge(A, B) ((A) >= (B))
+#define ogt(A, B) ((A) > (B))
+#define ordered(A, B) (!__builtin_isunordered (A, B))
+#define unordered(A, B) (__builtin_isunordered (A, B))
+#define ueq(A, B) (!__builtin_islessgreater (A, B))
+#define ult(A, B) (__builtin_isless (A, B))
+#define ule(A, B) (__builtin_islessequal (A, B))
+#define uge(A, B) (__builtin_isgreaterequal (A, B))
+#define ugt(A, B) (__builtin_isgreater (A, B))
+#define nueq(A, B) (__builtin_islessgreater (A, B))
+#define nult(A, B) (!__builtin_isless (A, B))
+#define nule(A, B) (!__builtin_islessequal (A, B))
+#define nuge(A, B) (!__builtin_isgreaterequal (A, B))
+#define nugt(A, B) (!__builtin_isgreater (A, B))
+
+#define TEST_LOOP(TYPE1, TYPE2, CMP)				\
+  void __attribute__ ((noinline, noclone))			\
+  test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest,	\
+					TYPE1 *restrict src,	\
+					TYPE1 fallback,		\
+					TYPE2 *restrict a,	\
+					TYPE2 *restrict b,	\
+					int count)		\
+  {								\
+    for (int i = 0; i < count; ++i)				\
+      {\
+        TYPE2 aval = a[i]; \
+        TYPE2 bval = b[i]; \
+        TYPE1 srcval = src[i]; \
+        dest[i] = CMP (aval, bval) ? srcval : fallback;		\
+      }\
+  }
+
+#define TEST_CMP(CMP) \
+  TEST_LOOP (int32_t, float, CMP) \
+  TEST_LOOP (uint32_t, float, CMP) \
+  TEST_LOOP (float, float, CMP) \
+  TEST_LOOP (int64_t, double, CMP) \
+  TEST_LOOP (uint64_t, double, CMP) \
+  TEST_LOOP (double, double, CMP)
+
+TEST_CMP (eq)
+TEST_CMP (ne)
+TEST_CMP (olt)
+TEST_CMP (ole)
+TEST_CMP (oge)
+TEST_CMP (ogt)
+TEST_CMP (ordered)
+TEST_CMP (unordered)
+TEST_CMP (ueq)
+TEST_CMP (ult)
+TEST_CMP (ule)
+TEST_CMP (uge)
+TEST_CMP (ugt)
+TEST_CMP (nueq)
+TEST_CMP (nult)
+TEST_CMP (nule)
+TEST_CMP (nuge)
+TEST_CMP (nugt)
+
+/* { dg-final { scan-assembler-times {\tvmfeq} 150 } } */
+/* { dg-final { scan-assembler-times {\tvmfne} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfgt} 30 } } */
+/* { dg-final { scan-assembler-times {\tvmflt} 30 } } */
+/* { dg-final { scan-assembler-times {\tvmfge} 18 } } */
+/* { dg-final { scan-assembler-times {\tvmfle} 18 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-3.c
new file mode 100644
index 00000000000..53384829e64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond-3.c
@@ -0,0 +1,13 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-trapping-math" } */
+
+/* The difference here is that nueq can use LTGT.  */
+
+#include "vcond-2.c"
+
+/* { dg-final { scan-assembler-times {\tvmfeq} 90 } } */
+/* { dg-final { scan-assembler-times {\tvmfne} 6 } } */
+/* { dg-final { scan-assembler-times {\tvmfgt} 30 } } */
+/* { dg-final { scan-assembler-times {\tvmflt} 30 } } */
+/* { dg-final { scan-assembler-times {\tvmfge} 18 } } */
+/* { dg-final { scan-assembler-times {\tvmfle} 18 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-1.c
new file mode 100644
index 00000000000..a84d22d2a73
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-1.c
@@ -0,0 +1,49 @@ 
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+
+#include "vcond-1.c"
+
+#define N 97
+
+#define TEST_VCOND_VAR(DATA_TYPE, CMP_TYPE, COND, SUFFIX)	\
+{								\
+  DATA_TYPE x[N], y[N], r[N];					\
+  CMP_TYPE a[N], b[N];						\
+  for (int i = 0; i < N; ++i)					\
+    {								\
+      x[i] = i;							\
+      y[i] = (i & 1) + 5;					\
+      a[i] = i - N / 3;						\
+      b[i] = N - N / 3 - i;					\
+      asm volatile ("" ::: "memory");				\
+    }								\
+  vcond_var_##CMP_TYPE##_##SUFFIX (r, x, y, a, b, N);		\
+  for (int i = 0; i < N; ++i)					\
+    if (r[i] != (a[i] COND b[i] ? x[i] : y[i]))			\
+      __builtin_abort ();					\
+}
+
+#define TEST_VCOND_IMM(DATA_TYPE, CMP_TYPE, COND, IMM, SUFFIX)	\
+{								\
+  DATA_TYPE x[N], y[N], r[N];					\
+  CMP_TYPE a[N];						\
+  for (int i = 0; i < N; ++i)					\
+    {								\
+      x[i] = i;							\
+      y[i] = (i & 1) + 5;					\
+      a[i] = IMM - N / 3 + i;					\
+      asm volatile ("" ::: "memory");				\
+    }								\
+  vcond_imm_##CMP_TYPE##_##SUFFIX (r, x, y, a, N);		\
+  for (int i = 0; i < N; ++i)					\
+    if (r[i] != (a[i] COND (CMP_TYPE) IMM ? x[i] : y[i]))	\
+      __builtin_abort ();					\
+}
+
+int __attribute__ ((optimize (1)))
+main (int argc, char **argv)
+{
+  TEST_VAR_ALL (TEST_VCOND_VAR)
+  TEST_IMM_ALL (TEST_VCOND_IMM)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-2.c
new file mode 100644
index 00000000000..56fd39f4691
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-2.c
@@ -0,0 +1,76 @@ 
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable" } */
+/* { dg-require-effective-target fenv_exceptions } */
+
+#include "vcond-2.c"
+
+#ifndef TEST_EXCEPTIONS
+#define TEST_EXCEPTIONS 1
+#endif
+
+#include <fenv.h>
+
+#define N 401
+
+#define RUN_LOOP(TYPE1, TYPE2, CMP, EXPECT_INVALID)			\
+  {									\
+    TYPE1 dest[N], src[N];						\
+    TYPE2 a[N], b[N];							\
+    for (int i = 0; i < N; ++i)						\
+      {									\
+	src[i] = i * i;							\
+	if (i % 5 == 0)							\
+	  a[i] = 0;							\
+	else if (i % 3)							\
+	  a[i] = i * 0.1;						\
+	else								\
+	  a[i] = i;							\
+	if (i % 7 == 0)							\
+	  b[i] = __builtin_nan ("");					\
+	else if (i % 6)							\
+	  b[i] = i * 0.1;						\
+	else								\
+	  b[i] = i;							\
+	asm volatile ("" ::: "memory");					\
+      }									\
+    feclearexcept (FE_ALL_EXCEPT);					\
+    test_##TYPE1##_##TYPE2##_##CMP##_var (dest, src, 11, a, b, N);	\
+    if (TEST_EXCEPTIONS							\
+	&& !fetestexcept (FE_INVALID) != !(EXPECT_INVALID))		\
+      __builtin_abort ();						\
+    for (int i = 0; i < N; ++i)						\
+      if (dest[i] != (CMP (a[i], b[i]) ? src[i] : 11))			\
+	__builtin_abort ();						\
+  }
+
+#define RUN_CMP(CMP, EXPECT_INVALID) \
+  RUN_LOOP (int32_t, float, CMP, EXPECT_INVALID) \
+  RUN_LOOP (uint32_t, float, CMP, EXPECT_INVALID) \
+  RUN_LOOP (float, float, CMP, EXPECT_INVALID) \
+  RUN_LOOP (int64_t, double, CMP, EXPECT_INVALID) \
+  RUN_LOOP (uint64_t, double, CMP, EXPECT_INVALID) \
+  RUN_LOOP (double, double, CMP, EXPECT_INVALID)
+
+int __attribute__ ((optimize (1)))
+main (void)
+{
+  RUN_CMP (eq, 0)
+  RUN_CMP (ne, 0)
+  RUN_CMP (olt, 1)
+  RUN_CMP (ole, 1)
+  RUN_CMP (oge, 1)
+  RUN_CMP (ogt, 1)
+  RUN_CMP (ordered, 0)
+  RUN_CMP (unordered, 0)
+  RUN_CMP (ueq, 0)
+  RUN_CMP (ult, 0)
+  RUN_CMP (ule, 0)
+  RUN_CMP (uge, 0)
+  RUN_CMP (ugt, 0)
+  RUN_CMP (nueq, 0)
+  RUN_CMP (nult, 0)
+  RUN_CMP (nule, 0)
+  RUN_CMP (nuge, 0)
+  RUN_CMP (nugt, 0)
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-3.c
new file mode 100644
index 00000000000..e50d561bd98
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/cmp/vcond_run-3.c
@@ -0,0 +1,6 @@ 
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-trapping-math" } */
+/* { dg-require-effective-target fenv_exceptions } */
+
+#define TEST_EXCEPTIONS 0
+#include "vcond_run-2.c"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
index bc99cc0c3cf..9809a421fc8 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
+++ b/gcc/testsuite/gcc.target/riscv/rvv/rvv.exp
@@ -63,6 +63,8 @@  foreach op $AUTOVEC_TEST_OPTS {
     "" "$op"
   dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/binop/*.\[cS\]]] \
     "" "$op"
+  dg-runtest [lsort [glob -nocomplain $srcdir/$subdir/autovec/cmp/*.\[cS\]]] \
+    "" "$op"
 }
 
 # VLS-VLMAX tests