@@ -1208,3 +1208,70 @@
riscv_vector::emit_vlmax_insn (icode, riscv_vector::RVV_BINOP, operands);
DONE;
})
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Conditional binary operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - vadd.vv/vsub.vv/...
+;; - vadd.vi/vsub.vi/...
+;; -------------------------------------------------------------------------
+
+(define_expand "cond_len_<optab><mode>"
+ [(match_operand:VI 0 "register_operand")
+ (match_operand:<VM> 1 "vector_mask_operand")
+ (any_int_binop_no_shift:VI
+ (match_operand:VI 2 "<binop_rhs1_predicate>")
+ (match_operand:VI 3 "<binop_rhs2_predicate>"))
+ (match_operand:VI 4 "register_operand")
+ (match_operand 5 "autovec_length_operand")
+ (match_operand 6 "const_0_operand")]
+ "TARGET_VECTOR"
+{
+ riscv_vector::expand_cond_len_binop (<CODE>, operands);
+ DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Conditional binary operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - vfadd.vv/vfsub.vv/...
+;; - vfadd.vf/vfsub.vf/...
+;; -------------------------------------------------------------------------
+
+(define_expand "cond_len_<optab><mode>"
+ [(match_operand:VF 0 "register_operand")
+ (match_operand:<VM> 1 "vector_mask_operand")
+ (any_float_binop:VF
+ (match_operand:VF 2 "register_operand")
+ (match_operand:VF 3 "register_operand"))
+ (match_operand:VF 4 "register_operand")
+ (match_operand 5 "autovec_length_operand")
+ (match_operand 6 "const_0_operand")]
+ "TARGET_VECTOR"
+{
+ riscv_vector::expand_cond_len_binop (<CODE>, operands);
+ DONE;
+})
+
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - vfmin.vv/vfmax.vv
+;; - vfmin.vf/vfmax.vf
+;; -------------------------------------------------------------------------
+
+(define_expand "cond_len_<optab><mode>"
+ [(match_operand:VF 0 "register_operand")
+ (match_operand:<VM> 1 "vector_mask_operand")
+ (any_float_binop_nofrm:VF
+ (match_operand:VF 2 "register_operand")
+ (match_operand:VF 3 "register_operand"))
+ (match_operand:VF 4 "register_operand")
+ (match_operand 5 "autovec_length_operand")
+ (match_operand 6 "const_0_operand")]
+ "TARGET_VECTOR"
+{
+ riscv_vector::expand_cond_len_binop (<CODE>, operands);
+ DONE;
+})
@@ -184,6 +184,7 @@ enum insn_type
RVV_UNOP = 2,
RVV_BINOP = 3,
RVV_BINOP_MU = RVV_BINOP + 2,
+ RVV_BINOP_TU = RVV_BINOP + 2,
RVV_MERGE_OP = 4,
RVV_CMP_OP = 4,
RVV_CMP_MU_OP = RVV_CMP_OP + 2, /* +2 means mask and maskoff operand. */
@@ -276,6 +277,7 @@ bool neg_simm5_p (rtx);
bool has_vi_variant_p (rtx_code, rtx);
void expand_vec_cmp (rtx, rtx_code, rtx, rtx);
bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
+void expand_cond_len_binop (rtx_code, rtx *);
#endif
bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
bool, void (*)(rtx *, rtx));
@@ -919,6 +919,45 @@ emit_vlmax_masked_mu_insn (unsigned icode, int op_num, rtx *ops)
e.emit_insn ((enum insn_code) icode, ops);
}
+/* This function emits a TU instruction. */
+static void
+emit_nonvlmax_tu_insn (unsigned icode, int op_num, rtx *ops, rtx avl)
+{
+ machine_mode dest_mode = GET_MODE (ops[0]);
+ machine_mode mask_mode = get_mask_mode (dest_mode).require ();
+ insn_expander<RVV_INSN_OPERANDS_MAX> e (/*OP_NUM*/ op_num,
+ /*HAS_DEST_P*/ true,
+ /*FULLY_UNMASKED_P*/ false,
+ /*USE_REAL_MERGE_P*/ true,
+ /*HAS_AVL_P*/ true,
+ /*VLMAX_P*/ false, dest_mode,
+ mask_mode);
+ e.set_policy (TAIL_UNDISTURBED);
+ e.set_policy (MASK_ANY);
+ e.set_vl (avl);
+ e.emit_insn ((enum insn_code) icode, ops);
+}
+
+/* This function emits a TU instruction. */
+static void
+emit_nonvlmax_fp_tu_insn (unsigned icode, int op_num, rtx *ops, rtx avl)
+{
+ machine_mode dest_mode = GET_MODE (ops[0]);
+ machine_mode mask_mode = get_mask_mode (dest_mode).require ();
+ insn_expander<RVV_INSN_OPERANDS_MAX> e (/*OP_NUM*/ op_num,
+ /*HAS_DEST_P*/ true,
+ /*FULLY_UNMASKED_P*/ false,
+ /*USE_REAL_MERGE_P*/ true,
+ /*HAS_AVL_P*/ true,
+ /*VLMAX_P*/ false, dest_mode,
+ mask_mode);
+ e.set_policy (TAIL_UNDISTURBED);
+ e.set_policy (MASK_ANY);
+ e.set_rounding_mode (FRM_DYN);
+ e.set_vl (avl);
+ e.emit_insn ((enum insn_code) icode, ops);
+}
+
/* Emit vmv.s.x instruction. */
void
@@ -2968,4 +3007,44 @@ expand_load_store (rtx *ops, bool is_load)
}
}
+/* Return true if the operation is the floating-point operation need FRM. */
+static bool
+needs_fp_rounding (rtx_code code, machine_mode mode)
+{
+ if (!FLOAT_MODE_P (mode))
+ return false;
+ return code != SMIN && code != SMAX;
+}
+
+/* Expand COND_LEN_*. */
+void
+expand_cond_len_binop (rtx_code code, rtx *ops)
+{
+ rtx dest = ops[0];
+ rtx mask = ops[1];
+ rtx src1 = ops[2];
+ rtx src2 = ops[3];
+ rtx merge = ops[4];
+ rtx len = ops[5];
+ machine_mode mode = GET_MODE (dest);
+ machine_mode mask_mode = GET_MODE (mask);
+
+ poly_uint64 value;
+ bool is_dummy_mask = rtx_equal_p (mask, CONSTM1_RTX (mask_mode));
+
+ if (is_dummy_mask)
+ {
+ /* Use TU, MASK ANY policy. */
+ rtx ops[] = {dest, mask, merge, src1, src2};
+ insn_code icode = code_for_pred (code, mode);
+ if (needs_fp_rounding (code, mode))
+ emit_nonvlmax_fp_tu_insn (icode, RVV_BINOP_MU, ops, len);
+ else
+ emit_nonvlmax_tu_insn (icode, RVV_BINOP_MU, ops, len);
+ }
+ else
+ /* FIXME: Enable this case when we support it in the middle-end. */
+ gcc_unreachable ();
+}
+
} // namespace riscv_vector
@@ -7832,6 +7832,24 @@ riscv_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
return false;
}
+/* Implement TARGET_PREFERRED_ELSE_VALUE. For binary operations,
+ prefer to use the first arithmetic operand as the else value if
+ the else value doesn't matter, since that exactly matches the RVV
+ destructive merging form. For ternary operations we could either
+ pick the first operand and use VMADD-like instructions or the last
+ operand and use VMACC-like instructions; the latter seems more
+ natural.
+
+ TODO: Currently, the return value is not ideal for RVV since it will
+ let VSETVL PASS use MU or TU. We will suport undefine value that allows
+ VSETVL PASS use TA/MA in the future. */
+
+static tree
+riscv_preferred_else_value (unsigned, tree, unsigned int nops, tree *ops)
+{
+ return nops == 3 ? ops[2] : ops[0];
+}
+
/* Initialize the GCC target structure. */
#undef TARGET_ASM_ALIGNED_HI_OP
#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -8133,6 +8151,9 @@ riscv_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
#undef TARGET_VECTORIZE_VEC_PERM_CONST
#define TARGET_VECTORIZE_VEC_PERM_CONST riscv_vectorize_vec_perm_const
+#undef TARGET_PREFERRED_ELSE_VALUE
+#define TARGET_PREFERRED_ELSE_VALUE riscv_preferred_else_value
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-riscv.h"
new file mode 100644
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=scalable" } */
+
+#include "vadd-run.c"
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include "vadd-template.h"
+
+/* { dg-final { scan-assembler-times {\tvadd\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvadd\.vi} 8 } } */
+/* { dg-final { scan-assembler-times {\tvfadd\.vv} 7 } } */
+/* There are 2 MINUS operations. */
+/* { dg-final { scan-assembler-times {\tvfsub\.vv} 2 } } */
+
+/* { dg-final { scan-tree-dump-times "\.COND_LEN_ADD" 7 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.COND_LEN_SUB" 2 "optimized" } } */
new file mode 100644
@@ -0,0 +1,13 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include "vadd-template.h"
+
+/* { dg-final { scan-assembler-times {\tvadd\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvadd\.vi} 8 } } */
+/* { dg-final { scan-assembler-times {\tvfadd\.vv} 7 } } */
+/* There are 2 MINUS operations. */
+/* { dg-final { scan-assembler-times {\tvfsub\.vv} 2 } } */
+
+/* { dg-final { scan-tree-dump-times "\.COND_LEN_ADD" 7 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.COND_LEN_SUB" 2 "optimized" } } */
new file mode 100644
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=scalable" } */
+
+#include "vdiv-run.c"
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include "vdiv-template.h"
+
+/* { dg-final { scan-assembler-times {\tvdiv\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {\tvdivu\.vv} 8 } } */
+
+/* { dg-final { scan-assembler-times {\tvfdiv\.vv} 6 } } */
+
+/* { dg-final { scan-tree-dump-times "\.COND_LEN_DIV" 16 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.COND_LEN_RDIV" 6 "optimized" } } */
@@ -1,15 +1,14 @@
/* { dg-do compile } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -ffast-math -fdump-tree-optimized-details" } */
#include "vdiv-template.h"
-/* Currently we use an epilogue loop which also contains vdivs. Therefore we
- expect 14 vdiv[u]s instead of 8. */
-
-/* { dg-final { scan-assembler-times {\tvdiv\.vv} 14 } } */
-/* { dg-final { scan-assembler-times {\tvdivu\.vv} 14 } } */
+/* { dg-final { scan-assembler-times {\tvdiv\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {\tvdivu\.vv} 8 } } */
/* Division by constant is done by calculating a reciprocal and
then multiplying. Hence we do not expect 6 vfdivs. */
/* { dg-final { scan-assembler-times {\tvfdiv\.vv} 3 } } */
/* { dg-final { scan-assembler-times {\tvfmul\.vv} 3 } } */
+
+/* { dg-final { scan-tree-dump-times "\.COND_LEN_DIV" 16 "optimized" } } */
new file mode 100644
@@ -0,0 +1,12 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include "vdiv-template.h"
+
+/* { dg-final { scan-assembler-times {\tvdiv\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {\tvdivu\.vv} 8 } } */
+
+/* { dg-final { scan-assembler-times {\tvfdiv\.vv} 6 } } */
+
+/* { dg-final { scan-tree-dump-times "\.COND_LEN_DIV" 16 "optimized" } } */
+/* { dg-final { scan-tree-dump-times "\.COND_LEN_RDIV" 6 "optimized" } } */
@@ -1,15 +1,14 @@
/* { dg-do compile } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax -ffast-math" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax -ffast-math -fdump-tree-optimized-details" } */
#include "vdiv-template.h"
-/* Currently we use an epilogue loop which also contains vdivs. Therefore we
- expect 14 vdiv[u]s instead of 8. */
-
-/* { dg-final { scan-assembler-times {\tvdiv\.vv} 14 } } */
-/* { dg-final { scan-assembler-times {\tvdivu\.vv} 14 } } */
+/* { dg-final { scan-assembler-times {\tvdiv\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {\tvdivu\.vv} 8 } } */
/* Division by constant is done by calculating a reciprocal and
then multiplying. Hence we do not expect 6 vfdivs. */
/* { dg-final { scan-assembler-times {\tvfdiv\.vv} 3 } } */
/* { dg-final { scan-assembler-times {\tvfmul\.vv} 3 } } */
+
+/* { dg-final { scan-tree-dump-times "\.COND_LEN_DIV" 16 "optimized" } } */
new file mode 100644
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=scalable" } */
+
+#include "vmul-run.c"
new file mode 100644
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include "vmul-template.h"
+
+/* { dg-final { scan-assembler-times {\tvmul\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvfmul\.vv} 6 } } */
+/* { dg-final { scan-tree-dump-times "\.COND_LEN_MUL" 6 "optimized" } } */
new file mode 100644
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include "vmul-template.h"
+
+/* { dg-final { scan-assembler-times {\tvmul\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvfmul\.vv} 6 } } */
+/* { dg-final { scan-tree-dump-times "\.COND_LEN_MUL" 6 "optimized" } } */
@@ -1,9 +1,7 @@
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv -mabi=ilp32d --param=riscv-autovec-preference=fixed-vlmax -fdump-tree-optimized-details" } */
#include "vrem-template.h"
-/* Currently we use an epilogue loop which also contains vrems. Therefore we
- expect 14 vrem[u]s instead of 8. */
-
-/* { dg-final { scan-assembler-times {\tvrem\.vv} 14 } } */
-/* { dg-final { scan-assembler-times {\tvremu\.vv} 14 } } */
+/* { dg-final { scan-assembler-times {\tvrem\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {\tvremu\.vv} 8 } } */
+/* { dg-final { scan-tree-dump-times "\.COND_LEN_MOD" 16 "optimized" } } */
@@ -1,10 +1,8 @@
/* { dg-do compile } */
-/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax" } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv -mabi=lp64d --param=riscv-autovec-preference=fixed-vlmax -fdump-tree-optimized-details" } */
#include "vrem-template.h"
-/* Currently we use an epilogue loop which also contains vrems. Therefore we
- expect 14 vrem[u]s instead of 8. */
-
-/* { dg-final { scan-assembler-times {\tvrem\.vv} 14 } } */
-/* { dg-final { scan-assembler-times {\tvremu\.vv} 14 } } */
+/* { dg-final { scan-assembler-times {\tvrem\.vv} 8 } } */
+/* { dg-final { scan-assembler-times {\tvremu\.vv} 8 } } */
+/* { dg-final { scan-tree-dump-times "\.COND_LEN_MOD" 16 "optimized" } } */
new file mode 100644
@@ -0,0 +1,4 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model --param=riscv-autovec-preference=scalable" } */
+
+#include "vsub-run.c"
new file mode 100644
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include "vsub-template.h"
+
+/* { dg-final { scan-assembler-times {\tvsub\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvrsub\.vi} 16 } } */
+
+/* { dg-final { scan-assembler-times {\tvfsub\.vv} 12 } } */
+/* { dg-final { scan-tree-dump-times "\.COND_LEN_SUB" 12 "optimized" } } */
+
+/* Do not expect vfrsub for now, because we do not properly
+ handle vop.vx and vfop.vf yet. */
+/* { dg-final { scan-assembler-times {\tvfrsub\.vv} 0 } } */
new file mode 100644
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-std=c99 -fno-vect-cost-model -march=rv64gcv_zvfh -mabi=lp64d --param=riscv-autovec-preference=scalable -fdump-tree-optimized-details" } */
+
+#include "vsub-template.h"
+
+/* { dg-final { scan-assembler-times {\tvsub\.vv} 16 } } */
+/* { dg-final { scan-assembler-times {\tvrsub\.vi} 16 } } */
+
+/* { dg-final { scan-assembler-times {\tvfsub\.vv} 12 } } */
+/* { dg-final { scan-tree-dump-times "\.COND_LEN_SUB" 12 "optimized" } } */
+
+/* Do not expect vfrsub for now, because we do not properly
+ handle vop.vx and vfop.vf yet. */
+/* { dg-final { scan-assembler-times {\tvfrsub\.vv} 0 } } */