diff mbox series

RISC-V: Support in-order floating-point reduction

Message ID 20230720073406.239379-1-juzhe.zhong@rivai.ai
State New
Headers show
Series RISC-V: Support in-order floating-point reduction | expand

Commit Message

juzhe.zhong@rivai.ai July 20, 2023, 7:34 a.m. UTC
This patch is depending on:
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/624995.html

Consider this following case:
float foo (float *__restrict a, int n)
{
  float result = 1.0;
  for (int i = 0; i < n; i++)
   result += a[i];
  return result;
}

Compile with **NO** -ffast-math:

Before this patch:
<source>:4:21: missed: couldn't vectorize loop
<source>:1:7: missed: not vectorized: relevant phi not supported: result_14 = PHI <result_11(6), 1.0e+0(5)>

After this patch:
foo:
	lui	a5,%hi(.LC0)
	flw	fa0,%lo(.LC0)(a5)
	ble	a1,zero,.L4
.L3:
	vsetvli	a5,a1,e32,m1,ta,ma
	vle32.v	v1,0(a0)
	slli	a4,a5,2
	vsetivli	zero,1,e32,m1,ta,ma
	sub	a1,a1,a5
	vfmv.s.f	v2,fa0
	add	a0,a0,a4
	vsetvli	zero,a5,e32,m1,ta,ma
	vfredosum.vs	v1,v1,v2     ----------> FOLD_LEFT_PLUS
	vfmv.f.s	fa0,v1
	bne	a1,zero,.L3
	ret
.L4:
	ret

gcc/ChangeLog:

	* config/riscv/autovec.md (fold_left_plus_<mode>): New pattern.
	(mask_len_fold_left_plus_<mode>): Ditto.
	* config/riscv/riscv-protos.h (enum insn_type): New enum.
	(enum reduction_type): Ditto.
	(expand_reduction): Add in-order reduction.
	* config/riscv/riscv-v.cc (emit_nonvlmax_fp_reduction_insn): New function.
	(expand_reduction): Add in-order reduction.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/autovec/reduc/reduc_strict-1.c: New test.
	* gcc.target/riscv/rvv/autovec/reduc/reduc_strict-2.c: New test.
	* gcc.target/riscv/rvv/autovec/reduc/reduc_strict-3.c: New test.
	* gcc.target/riscv/rvv/autovec/reduc/reduc_strict-4.c: New test.
	* gcc.target/riscv/rvv/autovec/reduc/reduc_strict-5.c: New test.
	* gcc.target/riscv/rvv/autovec/reduc/reduc_strict-6.c: New test.
	* gcc.target/riscv/rvv/autovec/reduc/reduc_strict-7.c: New test.
	* gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c: New test.
	* gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-2.c: New test.

---
 gcc/config/riscv/autovec.md                   | 39 ++++++++++++++++
 gcc/config/riscv/riscv-protos.h               | 11 ++++-
 gcc/config/riscv/riscv-v.cc                   | 45 ++++++++++++++++---
 .../riscv/rvv/autovec/reduc/reduc_strict-1.c  | 28 ++++++++++++
 .../riscv/rvv/autovec/reduc/reduc_strict-2.c  | 26 +++++++++++
 .../riscv/rvv/autovec/reduc/reduc_strict-3.c  | 18 ++++++++
 .../riscv/rvv/autovec/reduc/reduc_strict-4.c  | 24 ++++++++++
 .../riscv/rvv/autovec/reduc/reduc_strict-5.c  | 28 ++++++++++++
 .../riscv/rvv/autovec/reduc/reduc_strict-6.c  | 18 ++++++++
 .../riscv/rvv/autovec/reduc/reduc_strict-7.c  | 21 +++++++++
 .../rvv/autovec/reduc/reduc_strict_run-1.c    | 29 ++++++++++++
 .../rvv/autovec/reduc/reduc_strict_run-2.c    | 31 +++++++++++++
 12 files changed, 311 insertions(+), 7 deletions(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-2.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-3.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-4.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-5.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-6.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-7.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-2.c

Comments

Robin Dapp July 20, 2023, 7:41 a.m. UTC | #1
> +enum reduction_type
> +{
> +  UNORDERED_REDUDUCTION,
> +  FOLD_LEFT_REDUDUCTION,
> +  MASK_LEN_FOLD_LEFT_REDUDUCTION,
> +};

There are redundant 'DU's here ;)
Wouldn't it be sufficient to have an enum

enum reduction_type
{
  UNORDERED,
  FOLD_LEFT,
  MASK_LEN_FOLD_LEFT,
};
?

Regards
 Robin
juzhe.zhong@rivai.ai July 20, 2023, 7:42 a.m. UTC | #2
The UNORDERED enum will cause ICE since we have UNORDERED in rtx_code.

Could you give me another enum name?



juzhe.zhong@rivai.ai
 
From: Robin Dapp
Date: 2023-07-20 15:41
To: Juzhe-Zhong; gcc-patches
CC: rdapp.gcc; kito.cheng; kito.cheng; jeffreyalaw
Subject: Re: [PATCH] RISC-V: Support in-order floating-point reduction
> +enum reduction_type
> +{
> +  UNORDERED_REDUDUCTION,
> +  FOLD_LEFT_REDUDUCTION,
> +  MASK_LEN_FOLD_LEFT_REDUDUCTION,
> +};
 
There are redundant 'DU's here ;)
Wouldn't it be sufficient to have an enum
 
enum reduction_type
{
  UNORDERED,
  FOLD_LEFT,
  MASK_LEN_FOLD_LEFT,
};
?
 
Regards
Robin
Kito Cheng July 20, 2023, 7:42 a.m. UTC | #3
Seems like there is a potential vsetvli optimization chance in the example?

> After this patch:
> foo:
>         lui     a5,%hi(.LC0)
>         flw     fa0,%lo(.LC0)(a5)
>         ble     a1,zero,.L4
> .L3:
>         vsetvli a5,a1,e32,m1,ta,ma
>         vle32.v v1,0(a0)
>         slli    a4,a5,2
>         vsetivli        zero,1,e32,m1,ta,ma

This could just use "vsetvli a5,a1,e32,m1,ta,ma"

>         sub     a1,a1,a5
>         vfmv.s.f        v2,fa0
>         add     a0,a0,a4
>         vsetvli zero,a5,e32,m1,ta,ma

And then this can be removed too.

>         vfredosum.vs    v1,v1,v2
>         vfmv.f.s        fa0,v1
>         bne     a1,zero,.L3
>         ret
> .L4:
>         ret
juzhe.zhong@rivai.ai July 20, 2023, 7:53 a.m. UTC | #4
Oh, Yes. 
It can be easily addressed by this:
emit_scalar_move_insn (code_for_pred_broadcast (m1_mode), scalar_move_ops);

This patch emit scalar move insn with AVL = 1 for all reduction. It can easily addressed when we recognize it is mask_len_fold_left_plus reduction,
we assign the AVL to the scalar move insn.



juzhe.zhong@rivai.ai
 
From: Kito Cheng
Date: 2023-07-20 15:42
To: Juzhe-Zhong
CC: gcc-patches; kito.cheng; jeffreyalaw; rdapp.gcc
Subject: Re: [PATCH] RISC-V: Support in-order floating-point reduction
Seems like there is a potential vsetvli optimization chance in the example?
 
> After this patch:
> foo:
>         lui     a5,%hi(.LC0)
>         flw     fa0,%lo(.LC0)(a5)
>         ble     a1,zero,.L4
> .L3:
>         vsetvli a5,a1,e32,m1,ta,ma
>         vle32.v v1,0(a0)
>         slli    a4,a5,2
>         vsetivli        zero,1,e32,m1,ta,ma
 
This could just use "vsetvli a5,a1,e32,m1,ta,ma"
 
>         sub     a1,a1,a5
>         vfmv.s.f        v2,fa0
>         add     a0,a0,a4
>         vsetvli zero,a5,e32,m1,ta,ma
 
And then this can be removed too.
 
>         vfredosum.vs    v1,v1,v2
>         vfmv.f.s        fa0,v1
>         bne     a1,zero,.L3
>         ret
> .L4:
>         ret
Robin Dapp July 20, 2023, 7:57 a.m. UTC | #5
> The UNORDERED enum will cause ICE since we have UNORDERED in rtx_code.
> 
> Could you give me another enum name?

I would have expected it to work when it's namespaced.

Regards
 Robin
juzhe.zhong@rivai.ai July 20, 2023, 7:59 a.m. UTC | #6
I have no ideal, just ICE comes when running regression:

during RTL pass: expand
auto.c: In function 'test_int32_t_float_unordered_var':
auto.c:24:3: internal compiler error: in expand_vec_cmp_float, at config/riscv/riscv-v.cc:2564
   24 |   test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest,   \
      |   ^~~~~
auto.c:41:3: note: in expansion of macro 'TEST_LOOP'
   41 |   TEST_LOOP (int32_t, float, CMP) \
      |   ^~~~~~~~~
auto.c:55:1: note: in expansion of macro 'TEST_CMP'
   55 | TEST_CMP (unordered)
      | ^~~~~~~~
0x1c8af0d riscv_vector::expand_vec_cmp_float(rtx_def*, rtx_code, rtx_def*, rtx_def*, bool)
        ../../../riscv-gcc/gcc/config/riscv/riscv-v.cc:2564
0x233d200 gen_vec_cmprvvm1sfrvvmf32bi(rtx_def*, rtx_def*, rtx_def*, rtx_def*)
        ../../../riscv-gcc/gcc/config/riscv/autovec.md:559
0x14c4582 rtx_insn* insn_gen_fn::operator()<rtx_def*, rtx_def*, rtx_def*, rtx_def*>(rtx_def*, rtx_def*, rtx_def*, rtx_def*) const
        ../../../riscv-gcc/gcc/recog.h:407
0x14c3c02 maybe_gen_insn(insn_code, unsigned int, expand_operand*)
        ../../../riscv-gcc/gcc/optabs.cc:8197
0x14c4097 maybe_expand_insn(insn_code, unsigned int, expand_operand*)
        ../../../riscv-gcc/gcc/optabs.cc:8237
0x14c412b expand_insn(insn_code, unsigned int, expand_operand*)
        ../../../riscv-gcc/gcc/optabs.cc:8268
0x14bfc3e expand_vec_cmp_expr(tree_node*, tree_node*, rtx_def*)
        ../../../riscv-gcc/gcc/optabs.cc:6692
0x1124e4a do_store_flag
        ../../../riscv-gcc/gcc/expr.cc:13060
0x1116b10 expand_expr_real_2(separate_ops*, rtx_def*, machine_mode, expand_modifier)
        ../../../riscv-gcc/gcc/expr.cc:10265
0x1119405 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
        ../../../riscv-gcc/gcc/expr.cc:10810
0x1110fb0 expand_expr_real(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
        ../../../riscv-gcc/gcc/expr.cc:9015
0xf2e973 expand_normal(tree_node*)
        ../../../riscv-gcc/gcc/expr.h:316
0x12bb060 expand_vec_cond_mask_optab_fn
        ../../../riscv-gcc/gcc/internal-fn.cc:3059
0x12c27ca expand_VCOND_MASK
        ../../../riscv-gcc/gcc/internal-fn.def:184
0x12c52a5 expand_internal_call(internal_fn, gcall*)
        ../../../riscv-gcc/gcc/internal-fn.cc:4792
0x12c52d0 expand_internal_call(gcall*)
        ../../../riscv-gcc/gcc/internal-fn.cc:4800
0xf5e4c1 expand_call_stmt
        ../../../riscv-gcc/gcc/cfgexpand.cc:2737
0xf62871 expand_gimple_stmt_1
        ../../../riscv-gcc/gcc/cfgexpand.cc:3880
0xf62f0f expand_gimple_stmt
        ../../../riscv-gcc/gcc/cfgexpand.cc:4044
0xf6b8a9 expand_gimple_basic_block
        ../../../riscv-gcc/gcc/cfgexpand.cc:6096

This ICE happens when compiling vcond.cc tests


juzhe.zhong@rivai.ai
 
From: Robin Dapp
Date: 2023-07-20 15:57
To: juzhe.zhong@rivai.ai; gcc-patches
CC: rdapp.gcc; kito.cheng; Kito.cheng; jeffreyalaw
Subject: Re: [PATCH] RISC-V: Support in-order floating-point reduction
> The UNORDERED enum will cause ICE since we have UNORDERED in rtx_code.
> 
> Could you give me another enum name?
 
I would have expected it to work when it's namespaced.
 
Regards
Robin
Kito Cheng July 20, 2023, 8:03 a.m. UTC | #7
Seems like because you ` using namespace riscv_vector;` so the
UNORDERED in expand_vec_cmp_float used reduction_type::UNORDERED

Hmmm, maybe enum class?

enum class reduction_type
{
  UNORDERED,
  FOLD_LEFT,
  MASK_LEN_FOLD_LEFT,
};

and need use like this reduction_type::UNORDERED

On Thu, Jul 20, 2023 at 3:59 PM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> I have no ideal, just ICE comes when running regression:
>
> during RTL pass: expand
> auto.c: In function 'test_int32_t_float_unordered_var':
> auto.c:24:3: internal compiler error: in expand_vec_cmp_float, at config/riscv/riscv-v.cc:2564
>    24 |   test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest,   \
>       |   ^~~~~
> auto.c:41:3: note: in expansion of macro 'TEST_LOOP'
>    41 |   TEST_LOOP (int32_t, float, CMP) \
>       |   ^~~~~~~~~
> auto.c:55:1: note: in expansion of macro 'TEST_CMP'
>    55 | TEST_CMP (unordered)
>       | ^~~~~~~~
> 0x1c8af0d riscv_vector::expand_vec_cmp_float(rtx_def*, rtx_code, rtx_def*, rtx_def*, bool)
>         ../../../riscv-gcc/gcc/config/riscv/riscv-v.cc:2564
> 0x233d200 gen_vec_cmprvvm1sfrvvmf32bi(rtx_def*, rtx_def*, rtx_def*, rtx_def*)
>         ../../../riscv-gcc/gcc/config/riscv/autovec.md:559
> 0x14c4582 rtx_insn* insn_gen_fn::operator()<rtx_def*, rtx_def*, rtx_def*, rtx_def*>(rtx_def*, rtx_def*, rtx_def*, rtx_def*) const
>         ../../../riscv-gcc/gcc/recog.h:407
> 0x14c3c02 maybe_gen_insn(insn_code, unsigned int, expand_operand*)
>         ../../../riscv-gcc/gcc/optabs.cc:8197
> 0x14c4097 maybe_expand_insn(insn_code, unsigned int, expand_operand*)
>         ../../../riscv-gcc/gcc/optabs.cc:8237
> 0x14c412b expand_insn(insn_code, unsigned int, expand_operand*)
>         ../../../riscv-gcc/gcc/optabs.cc:8268
> 0x14bfc3e expand_vec_cmp_expr(tree_node*, tree_node*, rtx_def*)
>         ../../../riscv-gcc/gcc/optabs.cc:6692
> 0x1124e4a do_store_flag
>         ../../../riscv-gcc/gcc/expr.cc:13060
> 0x1116b10 expand_expr_real_2(separate_ops*, rtx_def*, machine_mode, expand_modifier)
>         ../../../riscv-gcc/gcc/expr.cc:10265
> 0x1119405 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
>         ../../../riscv-gcc/gcc/expr.cc:10810
> 0x1110fb0 expand_expr_real(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
>         ../../../riscv-gcc/gcc/expr.cc:9015
> 0xf2e973 expand_normal(tree_node*)
>         ../../../riscv-gcc/gcc/expr.h:316
> 0x12bb060 expand_vec_cond_mask_optab_fn
>         ../../../riscv-gcc/gcc/internal-fn.cc:3059
> 0x12c27ca expand_VCOND_MASK
>         ../../../riscv-gcc/gcc/internal-fn.def:184
> 0x12c52a5 expand_internal_call(internal_fn, gcall*)
>         ../../../riscv-gcc/gcc/internal-fn.cc:4792
> 0x12c52d0 expand_internal_call(gcall*)
>         ../../../riscv-gcc/gcc/internal-fn.cc:4800
> 0xf5e4c1 expand_call_stmt
>         ../../../riscv-gcc/gcc/cfgexpand.cc:2737
> 0xf62871 expand_gimple_stmt_1
>         ../../../riscv-gcc/gcc/cfgexpand.cc:3880
> 0xf62f0f expand_gimple_stmt
>         ../../../riscv-gcc/gcc/cfgexpand.cc:4044
> 0xf6b8a9 expand_gimple_basic_block
>         ../../../riscv-gcc/gcc/cfgexpand.cc:6096
>
> This ICE happens when compiling vcond.cc tests
> ________________________________
> juzhe.zhong@rivai.ai
>
>
> From: Robin Dapp
> Date: 2023-07-20 15:57
> To: juzhe.zhong@rivai.ai; gcc-patches
> CC: rdapp.gcc; kito.cheng; Kito.cheng; jeffreyalaw
> Subject: Re: [PATCH] RISC-V: Support in-order floating-point reduction
> > The UNORDERED enum will cause ICE since we have UNORDERED in rtx_code.
> >
> > Could you give me another enum name?
>
> I would have expected it to work when it's namespaced.
>
> Regards
> Robin
>
>
juzhe.zhong@rivai.ai July 20, 2023, 8:16 a.m. UTC | #8
I have tried this:
enum class reduction_type
{
  UNORDERED,
  FOLD_LEFT,
  MASK_LEN_FOLD_LEFT,
};

But fail to build.....

/gcc/build -I../../../riscv-gcc/gcc/../include  -I../../../riscv-gcc/gcc/../libcpp/include -g -O0 \
        -o build/gencondmd.o build/gencondmd.cc
In file included from ./tm_p.h:4:0,
                 from build/gencondmd.cc:29:
../../../riscv-gcc/gcc/config/riscv/riscv-protos.h:294:36: error: could not convert ‘UNORDERED’ from ‘rtx_code’ to ‘riscv_vector::reduction_type’
          reduction_type = UNORDERED);



juzhe.zhong@rivai.ai
 
From: Kito Cheng
Date: 2023-07-20 16:03
To: juzhe.zhong@rivai.ai
CC: Robin Dapp; gcc-patches; kito.cheng; jeffreyalaw
Subject: Re: Re: [PATCH] RISC-V: Support in-order floating-point reduction
Seems like because you ` using namespace riscv_vector;` so the
UNORDERED in expand_vec_cmp_float used reduction_type::UNORDERED
 
Hmmm, maybe enum class?
 
enum class reduction_type
{
  UNORDERED,
  FOLD_LEFT,
  MASK_LEN_FOLD_LEFT,
};
 
and need use like this reduction_type::UNORDERED
 
On Thu, Jul 20, 2023 at 3:59 PM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> I have no ideal, just ICE comes when running regression:
>
> during RTL pass: expand
> auto.c: In function 'test_int32_t_float_unordered_var':
> auto.c:24:3: internal compiler error: in expand_vec_cmp_float, at config/riscv/riscv-v.cc:2564
>    24 |   test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest,   \
>       |   ^~~~~
> auto.c:41:3: note: in expansion of macro 'TEST_LOOP'
>    41 |   TEST_LOOP (int32_t, float, CMP) \
>       |   ^~~~~~~~~
> auto.c:55:1: note: in expansion of macro 'TEST_CMP'
>    55 | TEST_CMP (unordered)
>       | ^~~~~~~~
> 0x1c8af0d riscv_vector::expand_vec_cmp_float(rtx_def*, rtx_code, rtx_def*, rtx_def*, bool)
>         ../../../riscv-gcc/gcc/config/riscv/riscv-v.cc:2564
> 0x233d200 gen_vec_cmprvvm1sfrvvmf32bi(rtx_def*, rtx_def*, rtx_def*, rtx_def*)
>         ../../../riscv-gcc/gcc/config/riscv/autovec.md:559
> 0x14c4582 rtx_insn* insn_gen_fn::operator()<rtx_def*, rtx_def*, rtx_def*, rtx_def*>(rtx_def*, rtx_def*, rtx_def*, rtx_def*) const
>         ../../../riscv-gcc/gcc/recog.h:407
> 0x14c3c02 maybe_gen_insn(insn_code, unsigned int, expand_operand*)
>         ../../../riscv-gcc/gcc/optabs.cc:8197
> 0x14c4097 maybe_expand_insn(insn_code, unsigned int, expand_operand*)
>         ../../../riscv-gcc/gcc/optabs.cc:8237
> 0x14c412b expand_insn(insn_code, unsigned int, expand_operand*)
>         ../../../riscv-gcc/gcc/optabs.cc:8268
> 0x14bfc3e expand_vec_cmp_expr(tree_node*, tree_node*, rtx_def*)
>         ../../../riscv-gcc/gcc/optabs.cc:6692
> 0x1124e4a do_store_flag
>         ../../../riscv-gcc/gcc/expr.cc:13060
> 0x1116b10 expand_expr_real_2(separate_ops*, rtx_def*, machine_mode, expand_modifier)
>         ../../../riscv-gcc/gcc/expr.cc:10265
> 0x1119405 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
>         ../../../riscv-gcc/gcc/expr.cc:10810
> 0x1110fb0 expand_expr_real(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
>         ../../../riscv-gcc/gcc/expr.cc:9015
> 0xf2e973 expand_normal(tree_node*)
>         ../../../riscv-gcc/gcc/expr.h:316
> 0x12bb060 expand_vec_cond_mask_optab_fn
>         ../../../riscv-gcc/gcc/internal-fn.cc:3059
> 0x12c27ca expand_VCOND_MASK
>         ../../../riscv-gcc/gcc/internal-fn.def:184
> 0x12c52a5 expand_internal_call(internal_fn, gcall*)
>         ../../../riscv-gcc/gcc/internal-fn.cc:4792
> 0x12c52d0 expand_internal_call(gcall*)
>         ../../../riscv-gcc/gcc/internal-fn.cc:4800
> 0xf5e4c1 expand_call_stmt
>         ../../../riscv-gcc/gcc/cfgexpand.cc:2737
> 0xf62871 expand_gimple_stmt_1
>         ../../../riscv-gcc/gcc/cfgexpand.cc:3880
> 0xf62f0f expand_gimple_stmt
>         ../../../riscv-gcc/gcc/cfgexpand.cc:4044
> 0xf6b8a9 expand_gimple_basic_block
>         ../../../riscv-gcc/gcc/cfgexpand.cc:6096
>
> This ICE happens when compiling vcond.cc tests
> ________________________________
> juzhe.zhong@rivai.ai
>
>
> From: Robin Dapp
> Date: 2023-07-20 15:57
> To: juzhe.zhong@rivai.ai; gcc-patches
> CC: rdapp.gcc; kito.cheng; Kito.cheng; jeffreyalaw
> Subject: Re: [PATCH] RISC-V: Support in-order floating-point reduction
> > The UNORDERED enum will cause ICE since we have UNORDERED in rtx_code.
> >
> > Could you give me another enum name?
>
> I would have expected it to work when it's namespaced.
>
> Regards
> Robin
>
>
Kito Cheng July 20, 2023, 8:24 a.m. UTC | #9
reduction_type = reduction_type::UNORDERED

On Thu, Jul 20, 2023 at 4:16 PM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> I have tried this:
> enum class reduction_type
> {
>   UNORDERED,
>   FOLD_LEFT,
>   MASK_LEN_FOLD_LEFT,
> };
>
> But fail to build.....
>
> /gcc/build -I../../../riscv-gcc/gcc/../include  -I../../../riscv-gcc/gcc/../libcpp/include -g -O0 \
>         -o build/gencondmd.o build/gencondmd.cc
> In file included from ./tm_p.h:4:0,
>                  from build/gencondmd.cc:29:
> ../../../riscv-gcc/gcc/config/riscv/riscv-protos.h:294:36: error: could not convert ‘UNORDERED’ from ‘rtx_code’ to ‘riscv_vector::reduction_type’
>           reduction_type = UNORDERED);
>
> ________________________________
> juzhe.zhong@rivai.ai
>
>
> From: Kito Cheng
> Date: 2023-07-20 16:03
> To: juzhe.zhong@rivai.ai
> CC: Robin Dapp; gcc-patches; kito.cheng; jeffreyalaw
> Subject: Re: Re: [PATCH] RISC-V: Support in-order floating-point reduction
> Seems like because you ` using namespace riscv_vector;` so the
> UNORDERED in expand_vec_cmp_float used reduction_type::UNORDERED
>
> Hmmm, maybe enum class?
>
> enum class reduction_type
> {
>   UNORDERED,
>   FOLD_LEFT,
>   MASK_LEN_FOLD_LEFT,
> };
>
> and need use like this reduction_type::UNORDERED
>
> On Thu, Jul 20, 2023 at 3:59 PM juzhe.zhong@rivai.ai
> <juzhe.zhong@rivai.ai> wrote:
> >
> > I have no ideal, just ICE comes when running regression:
> >
> > during RTL pass: expand
> > auto.c: In function 'test_int32_t_float_unordered_var':
> > auto.c:24:3: internal compiler error: in expand_vec_cmp_float, at config/riscv/riscv-v.cc:2564
> >    24 |   test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest,   \
> >       |   ^~~~~
> > auto.c:41:3: note: in expansion of macro 'TEST_LOOP'
> >    41 |   TEST_LOOP (int32_t, float, CMP) \
> >       |   ^~~~~~~~~
> > auto.c:55:1: note: in expansion of macro 'TEST_CMP'
> >    55 | TEST_CMP (unordered)
> >       | ^~~~~~~~
> > 0x1c8af0d riscv_vector::expand_vec_cmp_float(rtx_def*, rtx_code, rtx_def*, rtx_def*, bool)
> >         ../../../riscv-gcc/gcc/config/riscv/riscv-v.cc:2564
> > 0x233d200 gen_vec_cmprvvm1sfrvvmf32bi(rtx_def*, rtx_def*, rtx_def*, rtx_def*)
> >         ../../../riscv-gcc/gcc/config/riscv/autovec.md:559
> > 0x14c4582 rtx_insn* insn_gen_fn::operator()<rtx_def*, rtx_def*, rtx_def*, rtx_def*>(rtx_def*, rtx_def*, rtx_def*, rtx_def*) const
> >         ../../../riscv-gcc/gcc/recog.h:407
> > 0x14c3c02 maybe_gen_insn(insn_code, unsigned int, expand_operand*)
> >         ../../../riscv-gcc/gcc/optabs.cc:8197
> > 0x14c4097 maybe_expand_insn(insn_code, unsigned int, expand_operand*)
> >         ../../../riscv-gcc/gcc/optabs.cc:8237
> > 0x14c412b expand_insn(insn_code, unsigned int, expand_operand*)
> >         ../../../riscv-gcc/gcc/optabs.cc:8268
> > 0x14bfc3e expand_vec_cmp_expr(tree_node*, tree_node*, rtx_def*)
> >         ../../../riscv-gcc/gcc/optabs.cc:6692
> > 0x1124e4a do_store_flag
> >         ../../../riscv-gcc/gcc/expr.cc:13060
> > 0x1116b10 expand_expr_real_2(separate_ops*, rtx_def*, machine_mode, expand_modifier)
> >         ../../../riscv-gcc/gcc/expr.cc:10265
> > 0x1119405 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
> >         ../../../riscv-gcc/gcc/expr.cc:10810
> > 0x1110fb0 expand_expr_real(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
> >         ../../../riscv-gcc/gcc/expr.cc:9015
> > 0xf2e973 expand_normal(tree_node*)
> >         ../../../riscv-gcc/gcc/expr.h:316
> > 0x12bb060 expand_vec_cond_mask_optab_fn
> >         ../../../riscv-gcc/gcc/internal-fn.cc:3059
> > 0x12c27ca expand_VCOND_MASK
> >         ../../../riscv-gcc/gcc/internal-fn.def:184
> > 0x12c52a5 expand_internal_call(internal_fn, gcall*)
> >         ../../../riscv-gcc/gcc/internal-fn.cc:4792
> > 0x12c52d0 expand_internal_call(gcall*)
> >         ../../../riscv-gcc/gcc/internal-fn.cc:4800
> > 0xf5e4c1 expand_call_stmt
> >         ../../../riscv-gcc/gcc/cfgexpand.cc:2737
> > 0xf62871 expand_gimple_stmt_1
> >         ../../../riscv-gcc/gcc/cfgexpand.cc:3880
> > 0xf62f0f expand_gimple_stmt
> >         ../../../riscv-gcc/gcc/cfgexpand.cc:4044
> > 0xf6b8a9 expand_gimple_basic_block
> >         ../../../riscv-gcc/gcc/cfgexpand.cc:6096
> >
> > This ICE happens when compiling vcond.cc tests
> > ________________________________
> > juzhe.zhong@rivai.ai
> >
> >
> > From: Robin Dapp
> > Date: 2023-07-20 15:57
> > To: juzhe.zhong@rivai.ai; gcc-patches
> > CC: rdapp.gcc; kito.cheng; Kito.cheng; jeffreyalaw
> > Subject: Re: [PATCH] RISC-V: Support in-order floating-point reduction
> > > The UNORDERED enum will cause ICE since we have UNORDERED in rtx_code.
> > >
> > > Could you give me another enum name?
> >
> > I would have expected it to work when it's namespaced.
> >
> > Regards
> > Robin
> >
> >
>
juzhe.zhong@rivai.ai July 20, 2023, 8:53 a.m. UTC | #10
Address all comments on V2 patch:
https://gcc.gnu.org/pipermail/gcc-patches/2023-July/625038.html 

Redundant vsetvli are elided by this code:
  rtx len = type == reduction_type::MASK_LEN_FOLD_LEFT ? ops[4] : NULL_RTX;
  emit_scalar_move_insn (code_for_pred_broadcast (m1_mode), scalar_move_ops,
                         len);

Pass through len operand for MASK_LEN_FOLD_LEFT

Now the codegen:

foo:
lui a5,%hi(.LC0)
flw fa0,%lo(.LC0)(a5)
ble a1,zero,.L4
.L3:
vsetvli a5,a1,e32,m1,ta,ma
slli a4,a5,2
sub a1,a1,a5
vle32.v v1,0(a0)
vfmv.s.f v2,fa0
add a0,a0,a4
vfredosum.vs v1,v1,v2
vfmv.f.s fa0,v1
bne a1,zero,.L3
ret


juzhe.zhong@rivai.ai
 
From: Kito Cheng
Date: 2023-07-20 16:24
To: juzhe.zhong@rivai.ai
CC: Robin Dapp; gcc-patches; kito.cheng; jeffreyalaw
Subject: Re: Re: [PATCH] RISC-V: Support in-order floating-point reduction
reduction_type = reduction_type::UNORDERED
 
On Thu, Jul 20, 2023 at 4:16 PM juzhe.zhong@rivai.ai
<juzhe.zhong@rivai.ai> wrote:
>
> I have tried this:
> enum class reduction_type
> {
>   UNORDERED,
>   FOLD_LEFT,
>   MASK_LEN_FOLD_LEFT,
> };
>
> But fail to build.....
>
> /gcc/build -I../../../riscv-gcc/gcc/../include  -I../../../riscv-gcc/gcc/../libcpp/include -g -O0 \
>         -o build/gencondmd.o build/gencondmd.cc
> In file included from ./tm_p.h:4:0,
>                  from build/gencondmd.cc:29:
> ../../../riscv-gcc/gcc/config/riscv/riscv-protos.h:294:36: error: could not convert ‘UNORDERED’ from ‘rtx_code’ to ‘riscv_vector::reduction_type’
>           reduction_type = UNORDERED);
>
> ________________________________
> juzhe.zhong@rivai.ai
>
>
> From: Kito Cheng
> Date: 2023-07-20 16:03
> To: juzhe.zhong@rivai.ai
> CC: Robin Dapp; gcc-patches; kito.cheng; jeffreyalaw
> Subject: Re: Re: [PATCH] RISC-V: Support in-order floating-point reduction
> Seems like because you ` using namespace riscv_vector;` so the
> UNORDERED in expand_vec_cmp_float used reduction_type::UNORDERED
>
> Hmmm, maybe enum class?
>
> enum class reduction_type
> {
>   UNORDERED,
>   FOLD_LEFT,
>   MASK_LEN_FOLD_LEFT,
> };
>
> and need use like this reduction_type::UNORDERED
>
> On Thu, Jul 20, 2023 at 3:59 PM juzhe.zhong@rivai.ai
> <juzhe.zhong@rivai.ai> wrote:
> >
> > I have no ideal, just ICE comes when running regression:
> >
> > during RTL pass: expand
> > auto.c: In function 'test_int32_t_float_unordered_var':
> > auto.c:24:3: internal compiler error: in expand_vec_cmp_float, at config/riscv/riscv-v.cc:2564
> >    24 |   test_##TYPE1##_##TYPE2##_##CMP##_var (TYPE1 *restrict dest,   \
> >       |   ^~~~~
> > auto.c:41:3: note: in expansion of macro 'TEST_LOOP'
> >    41 |   TEST_LOOP (int32_t, float, CMP) \
> >       |   ^~~~~~~~~
> > auto.c:55:1: note: in expansion of macro 'TEST_CMP'
> >    55 | TEST_CMP (unordered)
> >       | ^~~~~~~~
> > 0x1c8af0d riscv_vector::expand_vec_cmp_float(rtx_def*, rtx_code, rtx_def*, rtx_def*, bool)
> >         ../../../riscv-gcc/gcc/config/riscv/riscv-v.cc:2564
> > 0x233d200 gen_vec_cmprvvm1sfrvvmf32bi(rtx_def*, rtx_def*, rtx_def*, rtx_def*)
> >         ../../../riscv-gcc/gcc/config/riscv/autovec.md:559
> > 0x14c4582 rtx_insn* insn_gen_fn::operator()<rtx_def*, rtx_def*, rtx_def*, rtx_def*>(rtx_def*, rtx_def*, rtx_def*, rtx_def*) const
> >         ../../../riscv-gcc/gcc/recog.h:407
> > 0x14c3c02 maybe_gen_insn(insn_code, unsigned int, expand_operand*)
> >         ../../../riscv-gcc/gcc/optabs.cc:8197
> > 0x14c4097 maybe_expand_insn(insn_code, unsigned int, expand_operand*)
> >         ../../../riscv-gcc/gcc/optabs.cc:8237
> > 0x14c412b expand_insn(insn_code, unsigned int, expand_operand*)
> >         ../../../riscv-gcc/gcc/optabs.cc:8268
> > 0x14bfc3e expand_vec_cmp_expr(tree_node*, tree_node*, rtx_def*)
> >         ../../../riscv-gcc/gcc/optabs.cc:6692
> > 0x1124e4a do_store_flag
> >         ../../../riscv-gcc/gcc/expr.cc:13060
> > 0x1116b10 expand_expr_real_2(separate_ops*, rtx_def*, machine_mode, expand_modifier)
> >         ../../../riscv-gcc/gcc/expr.cc:10265
> > 0x1119405 expand_expr_real_1(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
> >         ../../../riscv-gcc/gcc/expr.cc:10810
> > 0x1110fb0 expand_expr_real(tree_node*, rtx_def*, machine_mode, expand_modifier, rtx_def**, bool)
> >         ../../../riscv-gcc/gcc/expr.cc:9015
> > 0xf2e973 expand_normal(tree_node*)
> >         ../../../riscv-gcc/gcc/expr.h:316
> > 0x12bb060 expand_vec_cond_mask_optab_fn
> >         ../../../riscv-gcc/gcc/internal-fn.cc:3059
> > 0x12c27ca expand_VCOND_MASK
> >         ../../../riscv-gcc/gcc/internal-fn.def:184
> > 0x12c52a5 expand_internal_call(internal_fn, gcall*)
> >         ../../../riscv-gcc/gcc/internal-fn.cc:4792
> > 0x12c52d0 expand_internal_call(gcall*)
> >         ../../../riscv-gcc/gcc/internal-fn.cc:4800
> > 0xf5e4c1 expand_call_stmt
> >         ../../../riscv-gcc/gcc/cfgexpand.cc:2737
> > 0xf62871 expand_gimple_stmt_1
> >         ../../../riscv-gcc/gcc/cfgexpand.cc:3880
> > 0xf62f0f expand_gimple_stmt
> >         ../../../riscv-gcc/gcc/cfgexpand.cc:4044
> > 0xf6b8a9 expand_gimple_basic_block
> >         ../../../riscv-gcc/gcc/cfgexpand.cc:6096
> >
> > This ICE happens when compiling vcond.cc tests
> > ________________________________
> > juzhe.zhong@rivai.ai
> >
> >
> > From: Robin Dapp
> > Date: 2023-07-20 15:57
> > To: juzhe.zhong@rivai.ai; gcc-patches
> > CC: rdapp.gcc; kito.cheng; Kito.cheng; jeffreyalaw
> > Subject: Re: [PATCH] RISC-V: Support in-order floating-point reduction
> > > The UNORDERED enum will cause ICE since we have UNORDERED in rtx_code.
> > >
> > > Could you give me another enum name?
> >
> > I would have expected it to work when it's namespaced.
> >
> > Regards
> > Robin
> >
> >
>
diff mbox series

Patch

diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 00947207f3f..af55ef7b68f 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -1687,3 +1687,42 @@ 
   riscv_vector::expand_reduction (SMIN, operands, f);
   DONE;
 })
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Left-to-right reductions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - vfredosum.vs
+;; -------------------------------------------------------------------------
+
+;; Unpredicated in-order FP reductions.
+(define_expand "fold_left_plus_<mode>"
+  [(match_operand:<VEL> 0 "register_operand")
+   (match_operand:<VEL> 1 "register_operand")
+   (match_operand:VF 2 "register_operand")]
+  "TARGET_VECTOR"
+{
+  riscv_vector::expand_reduction (PLUS, operands,
+				  operands[1],
+				  riscv_vector::FOLD_LEFT_REDUDUCTION);
+  DONE;
+})
+
+;; Predicated in-order FP reductions.
+(define_expand "mask_len_fold_left_plus_<mode>"
+  [(match_operand:<VEL> 0 "register_operand")
+   (match_operand:<VEL> 1 "register_operand")
+   (match_operand:VF 2 "register_operand")
+   (match_operand:<VM> 3 "vector_mask_operand")
+   (match_operand 4 "autovec_length_operand")
+   (match_operand 5 "const_0_operand")]
+  "TARGET_VECTOR"
+{
+  if (rtx_equal_p (operands[4], const0_rtx))
+    emit_move_insn (operands[0], operands[1]);
+  else
+    riscv_vector::expand_reduction (PLUS, operands,
+				    operands[1],
+				    riscv_vector::MASK_LEN_FOLD_LEFT_REDUDUCTION);
+  DONE;
+})
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 16fb8dabca0..aa313b56a32 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -199,6 +199,7 @@  enum insn_type
   RVV_GATHER_M_OP = 5,
   RVV_SCATTER_M_OP = 4,
   RVV_REDUCTION_OP = 3,
+  RVV_REDUCTION_TU_OP = RVV_REDUCTION_OP + 2,
 };
 enum vlmul_type
 {
@@ -270,6 +271,13 @@  enum mask_policy
   MASK_AGNOSTIC = 1,
   MASK_ANY = 2,
 };
+
+enum reduction_type
+{
+  UNORDERED_REDUDUCTION,
+  FOLD_LEFT_REDUDUCTION,
+  MASK_LEN_FOLD_LEFT_REDUDUCTION,
+};
 enum tail_policy get_prefer_tail_policy ();
 enum mask_policy get_prefer_mask_policy ();
 rtx get_avl_type_rtx (enum avl_type);
@@ -282,7 +290,8 @@  bool has_vi_variant_p (rtx_code, rtx);
 void expand_vec_cmp (rtx, rtx_code, rtx, rtx);
 bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
 void expand_cond_len_binop (rtx_code, rtx *);
-void expand_reduction (rtx_code, rtx *, rtx);
+void expand_reduction (rtx_code, rtx *, rtx,
+		       reduction_type = UNORDERED_REDUDUCTION);
 #endif
 bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
 			  bool, void (*)(rtx *, rtx));
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 53088edf909..16321f1b116 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1196,6 +1196,26 @@  emit_vlmax_fp_reduction_insn (unsigned icode, int op_num, rtx *ops)
   e.emit_insn ((enum insn_code) icode, ops);
 }
 
+/* Emit reduction instruction.  */
+static void
+emit_nonvlmax_fp_reduction_insn (unsigned icode, int op_num, rtx *ops, rtx vl)
+{
+  machine_mode dest_mode = GET_MODE (ops[0]);
+  machine_mode mask_mode = get_mask_mode (GET_MODE (ops[1])).require ();
+  insn_expander<RVV_INSN_OPERANDS_MAX> e (op_num,
+					  /* HAS_DEST_P */ true,
+					  /* FULLY_UNMASKED_P */ false,
+					  /* USE_REAL_MERGE_P */ true,
+					  /* HAS_AVL_P */ true,
+					  /* VLMAX_P */ false, dest_mode,
+					  mask_mode);
+
+  e.set_policy (TAIL_ANY);
+  e.set_rounding_mode (FRM_DYN);
+  e.set_vl (vl);
+  e.emit_insn ((enum insn_code) icode, ops);
+}
+
 /* Emit merge instruction.  */
 
 static machine_mode
@@ -3343,9 +3363,10 @@  expand_cond_len_ternop (unsigned icode, rtx *ops)
 
 /* Expand reduction operations.  */
 void
-expand_reduction (rtx_code code, rtx *ops, rtx init)
+expand_reduction (rtx_code code, rtx *ops, rtx init, reduction_type type)
 {
-  machine_mode vmode = GET_MODE (ops[1]);
+  rtx vector = type == UNORDERED_REDUDUCTION ? ops[1] : ops[2];
+  machine_mode vmode = GET_MODE (vector);
   machine_mode m1_mode = get_m1_mode (vmode).require ();
   machine_mode m1_mmode = get_mask_mode (m1_mode).require ();
 
@@ -3356,13 +3377,25 @@  expand_reduction (rtx_code code, rtx *ops, rtx init)
   emit_scalar_move_insn (code_for_pred_broadcast (m1_mode), scalar_move_ops);
 
   rtx m1_tmp2 = gen_reg_rtx (m1_mode);
-  rtx reduc_ops[] = {m1_tmp2, ops[1], m1_tmp};
+  rtx reduc_ops[] = {m1_tmp2, vector, m1_tmp};
 
   if (FLOAT_MODE_P (vmode) && code == PLUS)
     {
-      insn_code icode
-	= code_for_pred_reduc_plus (UNSPEC_UNORDERED, vmode, m1_mode);
-      emit_vlmax_fp_reduction_insn (icode, RVV_REDUCTION_OP, reduc_ops);
+      insn_code icode = code_for_pred_reduc_plus (type == UNORDERED_REDUDUCTION
+						    ? UNSPEC_UNORDERED
+						    : UNSPEC_ORDERED,
+						  vmode, m1_mode);
+      if (type == MASK_LEN_FOLD_LEFT_REDUDUCTION)
+	{
+	  rtx mask = ops[3];
+	  rtx len = ops[4];
+	  rtx mask_len_reduc_ops[]
+	    = {m1_tmp2, mask, RVV_VUNDEF (m1_mode), vector, m1_tmp};
+	  emit_nonvlmax_fp_reduction_insn (icode, RVV_REDUCTION_TU_OP,
+					   mask_len_reduc_ops, len);
+	}
+      else
+	emit_vlmax_fp_reduction_insn (icode, RVV_REDUCTION_OP, reduc_ops);
     }
   else
     {
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-1.c
new file mode 100644
index 00000000000..c293e9ae746
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-1.c
@@ -0,0 +1,28 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include <stdint-gcc.h>
+
+#define NUM_ELEMS(TYPE) ((int)(5 * (256 / sizeof (TYPE)) + 3))
+
+#define DEF_REDUC_PLUS(TYPE)			\
+  TYPE __attribute__ ((noinline, noclone))	\
+  reduc_plus_##TYPE (TYPE *a, TYPE *b)		\
+  {						\
+    TYPE r = 0, q = 3;				\
+    for (int i = 0; i < NUM_ELEMS (TYPE); i++)	\
+      {						\
+	r += a[i];				\
+	q -= b[i];				\
+      }						\
+    return r * q;				\
+  }
+
+#define TEST_ALL(T) \
+  T (_Float16) \
+  T (float) \
+  T (double)
+
+TEST_ALL (DEF_REDUC_PLUS)
+
+/* { dg-final { scan-assembler-times {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 6 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-2.c
new file mode 100644
index 00000000000..2e1e7ab674d
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-2.c
@@ -0,0 +1,26 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#define NUM_ELEMS(TYPE) ((int) (5 * (256 / sizeof (TYPE)) + 3))
+
+#define DEF_REDUC_PLUS(TYPE)					\
+void __attribute__ ((noinline, noclone))			\
+reduc_plus_##TYPE (TYPE (*restrict a)[NUM_ELEMS (TYPE)],	\
+		   TYPE *restrict r, int n)			\
+{								\
+  for (int i = 0; i < n; i++)					\
+    {								\
+      r[i] = 0;							\
+      for (int j = 0; j < NUM_ELEMS (TYPE); j++)		\
+        r[i] += a[i][j];					\
+    }								\
+}
+
+#define TEST_ALL(T) \
+  T (_Float16) \
+  T (float) \
+  T (double)
+
+TEST_ALL (DEF_REDUC_PLUS)
+
+/* { dg-final { scan-assembler-times {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 3 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-3.c
new file mode 100644
index 00000000000..f559d40e60f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-3.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+double mat[100][2];
+
+double
+slp_reduc_plus (int n)
+{
+  double tmp = 0.0;
+  for (int i = 0; i < n; i++)
+    {
+      tmp = tmp + mat[i][0];
+      tmp = tmp + mat[i][1];
+    }
+  return tmp;
+}
+
+/* { dg-final { scan-assembler-times {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 1 } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-4.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-4.c
new file mode 100644
index 00000000000..428d371d9cf
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-4.c
@@ -0,0 +1,24 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+double mat[100][8];
+
+double
+slp_reduc_plus (int n)
+{
+  double tmp = 0.0;
+  for (int i = 0; i < n; i++)
+    {
+      tmp = tmp + mat[i][0];
+      tmp = tmp + mat[i][1];
+      tmp = tmp + mat[i][2];
+      tmp = tmp + mat[i][3];
+      tmp = tmp + mat[i][4];
+      tmp = tmp + mat[i][5];
+      tmp = tmp + mat[i][6];
+      tmp = tmp + mat[i][7];
+    }
+  return tmp;
+}
+
+/* { dg-final { scan-assembler {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-5.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-5.c
new file mode 100644
index 00000000000..24add2291f1
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-5.c
@@ -0,0 +1,28 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+double mat[100][12];
+
+double
+slp_reduc_plus (int n)
+{
+  double tmp = 0.0;
+  for (int i = 0; i < n; i++)
+    {
+      tmp = tmp + mat[i][0];
+      tmp = tmp + mat[i][1];
+      tmp = tmp + mat[i][2];
+      tmp = tmp + mat[i][3];
+      tmp = tmp + mat[i][4];
+      tmp = tmp + mat[i][5];
+      tmp = tmp + mat[i][6];
+      tmp = tmp + mat[i][7];
+      tmp = tmp + mat[i][8];
+      tmp = tmp + mat[i][9];
+      tmp = tmp + mat[i][10];
+      tmp = tmp + mat[i][11];
+    }
+  return tmp;
+}
+
+/* { dg-final { scan-assembler {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-6.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-6.c
new file mode 100644
index 00000000000..c1567b067ba
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-6.c
@@ -0,0 +1,18 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model -fdump-tree-vect-details" } */
+
+float
+double_reduc (float (*i)[16])
+{
+  float l = 0;
+
+#pragma GCC unroll 0
+  for (int a = 0; a < 8; a++)
+    for (int b = 0; b < 100; b++)
+      l += i[b][a];
+  return l;
+}
+
+/* { dg-final { scan-assembler-times {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 1 } } */
+/* { dg-final { scan-tree-dump "Detected double reduction" "vect" } } */
+/* { dg-final { scan-tree-dump-not "OUTER LOOP VECTORIZED" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-7.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-7.c
new file mode 100644
index 00000000000..f742a824bb2
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict-7.c
@@ -0,0 +1,21 @@ 
+/* { dg-do compile } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d --param=riscv-autovec-preference=scalable -fno-vect-cost-model -fdump-tree-vect-details" } */
+
+float
+double_reduc (float *i, float *j)
+{
+  float k = 0, l = 0;
+
+  for (int a = 0; a < 8; a++)
+    for (int b = 0; b < 100; b++)
+      {
+        k += i[b];
+        l += j[b];
+      }
+  return l * k;
+}
+
+/* { dg-final { scan-assembler-times {vle32\.v} 2 } } */
+/* { dg-final { scan-assembler-times {vfredosum\.vs\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 2 } } */
+/* { dg-final { scan-tree-dump "Detected double reduction" "vect" } } */
+/* { dg-final { scan-tree-dump-not "OUTER LOOP VECTORIZED" "vect" } } */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c
new file mode 100644
index 00000000000..516be97e9eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-1.c
@@ -0,0 +1,29 @@ 
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "reduc_strict-1.c"
+
+#define TEST_REDUC_PLUS(TYPE)			\
+  {						\
+    TYPE a[NUM_ELEMS (TYPE)];			\
+    TYPE b[NUM_ELEMS (TYPE)];			\
+    TYPE r = 0, q = 3;				\
+    for (int i = 0; i < NUM_ELEMS (TYPE); i++)	\
+      {						\
+	a[i] = (i * 0.1) * (i & 1 ? 1 : -1);	\
+	b[i] = (i * 0.3) * (i & 1 ? 1 : -1);	\
+	r += a[i];				\
+	q -= b[i];				\
+	asm volatile ("" ::: "memory");		\
+      }						\
+    TYPE res = reduc_plus_##TYPE (a, b);	\
+    if (res != r * q)				\
+      __builtin_abort ();			\
+  }
+
+int __attribute__ ((optimize (1)))
+main ()
+{
+  TEST_ALL (TEST_REDUC_PLUS);
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-2.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-2.c
new file mode 100644
index 00000000000..0a4238d96f3
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_strict_run-2.c
@@ -0,0 +1,31 @@ 
+/* { dg-do run { target { riscv_vector } } } */
+/* { dg-additional-options "--param=riscv-autovec-preference=scalable -fno-vect-cost-model" } */
+
+#include "reduc_strict-2.c"
+
+#define NROWS 5
+
+#define TEST_REDUC_PLUS(TYPE)					\
+  {								\
+    TYPE a[NROWS][NUM_ELEMS (TYPE)];				\
+    TYPE r[NROWS];						\
+    TYPE expected[NROWS] = {};					\
+    for (int i = 0; i < NROWS; ++i)				\
+      for (int j = 0; j < NUM_ELEMS (TYPE); ++j)		\
+	{							\
+	  a[i][j] = (i * 0.1 + j * 0.6) * (j & 1 ? 1 : -1);	\
+	  expected[i] += a[i][j];				\
+	  asm volatile ("" ::: "memory");			\
+	}							\
+    reduc_plus_##TYPE (a, r, NROWS);				\
+    for (int i = 0; i < NROWS; ++i)				\
+      if (r[i] != expected[i])					\
+	__builtin_abort ();					\
+  }
+
+int __attribute__ ((optimize (1)))
+main ()
+{
+  TEST_ALL (TEST_REDUC_PLUS);
+  return 0;
+}