@@ -1119,6 +1119,78 @@
}
[(set_attr "type" "vfwmuladd")])
+;; Combine mask_extend + vredsum to mask_vwredsum[u]
+;; where the mrege of mask_extend is vector const 0
+(define_insn_and_split "*cond_widen_reduc_plus_scal_<mode>"
+ [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
+ (unspec:<V_DOUBLE_EXTEND_VEL> [
+ (if_then_else:<V_DOUBLE_EXTEND>
+ (match_operand:<VM> 1 "register_operand")
+ (any_extend:<V_DOUBLE_EXTEND>
+ (match_operand:VI_QHS_NO_M8 2 "register_operand"))
+ (if_then_else:<V_DOUBLE_EXTEND>
+ (unspec:<VM> [
+ (match_operand:<VM> 3 "vector_all_trues_mask_operand")
+ (match_operand 6 "vector_length_operand")
+ (match_operand 7 "const_int_operand")
+ (match_operand 8 "const_int_operand")
+ (match_operand 9 "const_1_or_2_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)
+ ] UNSPEC_VPREDICATE)
+ (match_operand:<V_DOUBLE_EXTEND> 5 "vector_const_0_operand")
+ (match_operand:<V_DOUBLE_EXTEND> 4 "vector_merge_operand")))
+ ] UNSPEC_REDUC_SUM))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ rtx ops[] = {operands[0], operands[2], operands[1],
+ gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)};
+ riscv_vector::expand_reduction (<WREDUC_UNSPEC>,
+ riscv_vector::REDUCE_OP_M,
+ ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
+ DONE;
+}
+[(set_attr "type" "vector")])
+
+;; Combine mask_extend + vfredsum to mask_vfwredusum
+;; where the mrege of mask_extend is vector const 0
+(define_insn_and_split "*cond_widen_reduc_plus_scal_<mode>"
+ [(set (match_operand:<V_DOUBLE_EXTEND_VEL> 0 "register_operand")
+ (unspec:<V_DOUBLE_EXTEND_VEL> [
+ (if_then_else:<V_DOUBLE_EXTEND>
+ (match_operand:<VM> 1 "register_operand")
+ (float_extend:<V_DOUBLE_EXTEND>
+ (match_operand:VF_HS_NO_M8 2 "register_operand"))
+ (if_then_else:<V_DOUBLE_EXTEND>
+ (unspec:<VM> [
+ (match_operand:<VM> 3 "vector_all_trues_mask_operand")
+ (match_operand 6 "vector_length_operand")
+ (match_operand 7 "const_int_operand")
+ (match_operand 8 "const_int_operand")
+ (match_operand 9 "const_1_or_2_operand")
+ (reg:SI VL_REGNUM)
+ (reg:SI VTYPE_REGNUM)
+ ] UNSPEC_VPREDICATE)
+ (match_operand:<V_DOUBLE_EXTEND> 5 "vector_const_0_operand")
+ (match_operand:<V_DOUBLE_EXTEND> 4 "vector_merge_operand")))
+ ] UNSPEC_REDUC_SUM_UNORDERED))]
+ "TARGET_VECTOR && can_create_pseudo_p ()"
+ "#"
+ "&& 1"
+ [(const_int 0)]
+{
+ rtx ops[] = {operands[0], operands[2], operands[1],
+ gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)};
+ riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED,
+ riscv_vector::REDUCE_OP_M_FRM_DYN,
+ ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
+ DONE;
+}
+[(set_attr "type" "vector")])
+
;; =============================================================================
;; Misc combine patterns
;; =============================================================================
@@ -337,6 +337,7 @@ enum insn_type : unsigned int
/* For vreduce, no mask policy operand. */
REDUCE_OP = __NORMAL_OP_TA | BINARY_OP_P | VTYPE_MODE_FROM_OP1_P,
+ REDUCE_OP_M = __MASK_OP_TA | BINARY_OP_P | VTYPE_MODE_FROM_OP1_P,
REDUCE_OP_FRM_DYN = REDUCE_OP | FRM_DYN_P | VTYPE_MODE_FROM_OP1_P,
REDUCE_OP_M_FRM_DYN
= __MASK_OP_TA | BINARY_OP_P | FRM_DYN_P | VTYPE_MODE_FROM_OP1_P,
@@ -366,8 +367,9 @@ enum vlmul_type
enum avl_type
{
- NONVLMAX,
- VLMAX,
+ NONVLMAX = 0,
+ VLMAX = 1,
+ VLS = 2,
};
/* Routines implemented in riscv-vector-builtins.cc. */
void init_builtins (void);
@@ -284,6 +284,7 @@ public:
/* Add vl operand. */
rtx len = m_vl_op;
+ bool vls_p = false;
if (m_vlmax_p)
{
if (riscv_v_ext_vls_mode_p (vtype_mode))
@@ -294,7 +295,7 @@ public:
len = gen_int_mode (nunits, Pmode);
if (!satisfies_constraint_K (len))
len = force_reg (Pmode, len);
- m_vlmax_p = false;
+ vls_p = true;
}
else if (const_vlmax_p (vtype_mode))
{
@@ -302,7 +303,7 @@ public:
the vsetvli to obtain the value of vlmax. */
poly_uint64 nunits = GET_MODE_NUNITS (vtype_mode);
len = gen_int_mode (nunits, Pmode);
- m_vlmax_p = false;
+ vls_p = true;
}
else if (can_create_pseudo_p ())
{
@@ -318,7 +319,9 @@ public:
add_policy_operand ();
/* Add avl_type operand. */
- add_avl_type_operand (m_vlmax_p ? avl_type::VLMAX : avl_type::NONVLMAX);
+ add_avl_type_operand (
+ vls_p ? avl_type::VLS
+ : (m_vlmax_p ? avl_type::VLMAX : avl_type::NONVLMAX));
/* Add rounding mode operand. */
if (m_insn_flags & FRM_DYN_P)
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv_zvfh_zvl128b -mabi=lp64d --param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m2 -fno-vect-cost-model -ffast-math" } */
+#include <stdint-gcc.h>
+
+#define TEST_TYPE(TYPE1, TYPE2, N) \
+ __attribute__ ((noipa)) \
+ TYPE1 reduc_##TYPE1##_##TYPE2 (TYPE2 *restrict a, TYPE2 *restrict pred) \
+ { \
+ TYPE1 sum = 0; \
+ for (int i = 0; i < N; i += 1) \
+ if (pred[i]) \
+ sum += a[i]; \
+ return sum; \
+ }
+
+#define TEST_ALL(TEST) \
+ TEST (int16_t, int8_t, 16) \
+ TEST (int32_t, int16_t, 8) \
+ TEST (int64_t, int32_t, 4) \
+ TEST (uint16_t, uint8_t, 16) \
+ TEST (uint32_t, uint16_t, 8) \
+ TEST (uint64_t, uint32_t, 4) \
+ TEST (float, _Float16, 8) \
+ TEST (double, float, 4)
+
+TEST_ALL (TEST_TYPE)
+
+/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvwredsum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
+/* { dg-final { scan-assembler-times {\tvwredsumu\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
new file mode 100644
@@ -0,0 +1,30 @@
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv64gcv_zvfh_zvl128b -mabi=lp64d --param riscv-autovec-preference=scalable --param riscv-autovec-lmul=m2 -fno-vect-cost-model -ffast-math" } */
+#include <stdint-gcc.h>
+
+#define TEST_TYPE(TYPE1, TYPE2, N) \
+ __attribute__ ((noipa)) \
+ TYPE1 reduc_##TYPE1##_##TYPE2 (TYPE2 *restrict a, TYPE2 *restrict pred) \
+ { \
+ TYPE1 sum = 0; \
+ for (int i = 0; i < N; i += 1) \
+ if (pred[i]) \
+ sum += a[i]; \
+ return sum; \
+ }
+
+#define TEST_ALL(TEST) \
+ TEST (int16_t, int8_t, 16) \
+ TEST (int32_t, int16_t, 8) \
+ TEST (int64_t, int32_t, 4) \
+ TEST (uint16_t, uint8_t, 16) \
+ TEST (uint32_t, uint16_t, 8) \
+ TEST (uint64_t, uint32_t, 4) \
+ TEST (float, _Float16, 8) \
+ TEST (double, float, 4)
+
+TEST_ALL (TEST_TYPE)
+
+/* { dg-final { scan-assembler-times {\tvfwredusum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 2 } } */
+/* { dg-final { scan-assembler-times {\tvwredsum\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
+/* { dg-final { scan-assembler-times {\tvwredsumu\.vs\tv[0-9]+,v[0-9]+,v[0-9]+,v0\.t} 3 } } */
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param riscv-autovec-preference=fixed-vlmax --param riscv-autovec-lmul=m2 -fno-vect-cost-model -ffast-math" } */
+
+#include "cond_widen_reduc-1.c"
+
+#define RUN(TYPE1, TYPE2, N) \
+ { \
+ TYPE2 a[N]; \
+ TYPE2 pred[N]; \
+ TYPE1 r = 0; \
+ for (int i = 0; i < N; i++) \
+ { \
+ a[i] = (i * 0.1) * (i & 1 ? 1 : -1); \
+ pred[i] = i % 3; \
+ if (pred[i]) \
+ r += a[i]; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ if (r != reduc_##TYPE1##_##TYPE2 (a, pred)) \
+ __builtin_abort (); \
+ }
+
+int __attribute__ ((optimize (1)))
+main ()
+{
+ TEST_ALL (RUN)
+ return 0;
+}
new file mode 100644
@@ -0,0 +1,28 @@
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param riscv-autovec-preference=scalable --param riscv-autovec-lmul=m2 -fno-vect-cost-model -ffast-math" } */
+
+#include "cond_widen_reduc-2.c"
+
+#define RUN(TYPE1, TYPE2, N) \
+ { \
+ TYPE2 a[N]; \
+ TYPE2 pred[N]; \
+ TYPE1 r = 0; \
+ for (int i = 0; i < N; i++) \
+ { \
+ a[i] = (i * 0.1) * (i & 1 ? 1 : -1); \
+ pred[i] = i % 3; \
+ if (pred[i]) \
+ r += a[i]; \
+ asm volatile ("" ::: "memory"); \
+ } \
+ if (r != reduc_##TYPE1##_##TYPE2 (a, pred)) \
+ __builtin_abort (); \
+ }
+
+int __attribute__ ((optimize (1)))
+main ()
+{
+ TEST_ALL (RUN)
+ return 0;
+}