Message ID | 20230728115004.3071397-1-yanzhang.wang@intel.com |
---|---|
State | New |
Headers | show |
Series | [v2] RISC-V: convert the mulh with 0 to mov 0 to the reg. | expand |
<yanzhang.wang@intel.com> 於 2023年7月28日 週五 19:50 寫道: > From: Yanzhang Wang <yanzhang.wang@intel.com> > > This patch will optimize the below mulh example, > > vint32m1_t shortcut_for_riscv_vmulh_case_0(vint32m1_t v1, size_t vl) { > return __riscv_vmulh_vx_i32m1(v1, 0, vl); > } > > from mulh pattern > > vsetvli zero, a2, e32, m1, ta, ma > vmulh.vx v24, v24, zero > vs1r.v v24, 0(a0) > > to below vmv. > > vsetvli zero,a2,e32,m1,ta,ma > vmv.v.i v1,0 > vs1r.v v1,0(a0) > > It will elimate the mul with const 0 instruction to the simple mov > instruction. > > Signed-off-by: Yanzhang Wang <yanzhang.wang@intel.com> > > gcc/ChangeLog: > > * config/riscv/autovec-opt.md: Add a split pattern. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/base/binop_vx_constraint-121.c: The mul > with 0 will be simplified to vmv.v.i. > * gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc: New test. > --- > gcc/config/riscv/autovec-opt.md | 58 +++++++++++++++++++ > gcc/config/riscv/riscv-protos.h | 2 + > gcc/config/riscv/riscv-v.cc | 57 ++++++++++++++++++ > .../riscv/rvv/autovec/vmulh-with-zero.cc | 19 ++++++ > .../riscv/rvv/base/binop_vx_constraint-121.c | 3 +- > 5 files changed, 138 insertions(+), 1 deletion(-) > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc > > diff --git a/gcc/config/riscv/autovec-opt.md > b/gcc/config/riscv/autovec-opt.md > index 28040805b23..0d87572d1a4 100644 > --- a/gcc/config/riscv/autovec-opt.md > +++ b/gcc/config/riscv/autovec-opt.md > @@ -405,3 +405,61 @@ > "vmv.x.s\t%0,%1" > [(set_attr "type" "vimovvx") > (set_attr "mode" "<MODE>")]) > + > +;;; Simplify the mulh with 0 to move > +(define_split > + [(set (match_operand:VI_QHS 0 "register_operand") > + (if_then_else:VI_QHS > + (unspec:<VM> > + [(match_operand:<VM> 1 "vector_all_trues_mask_operand") > + (match_operand 5 "vector_length_operand") > + (match_operand 6 "const_int_operand") > + (match_operand 7 "const_int_operand") > + (match_operand 8 "const_int_operand") > + (reg:SI VL_REGNUM) > + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) > + (unspec:VI_QHS > + [(vec_duplicate:VI_QHS > + (match_operand:<VEL> 4 "reg_or_0_operand")) > This could be just a const int zero rather than a match operand + (match_operand:VI_QHS 3 "register_operand")] VMULH) > + (match_operand:VI_QHS 2 "vector_merge_operand") > + ))] > + "TARGET_VECTOR > + && rtx_equal_p (operands[4], CONST0_RTX (GET_MODE (operands[4])))" > Then no need to check here. + [(const_int 0)] > +{ > + riscv_vector::simplify_unspec_operations (operands, UNSPEC, > + <VMULH>, <MODE>mode) ; > + DONE; > +}) > + > +;;; Simplify vmadc + vadc with 0 to a simple move. > +(define_split > + [(set (match_operand:VI 0 "register_operand") > + (if_then_else:VI > + (unspec:<VM> > + [(match_operand 4 "vector_length_operand") > + (match_operand 5 "const_int_operand") > + (match_operand 6 "const_int_operand") > + (reg:SI VL_REGNUM) > + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) > + (unspec:VI > + [(match_operand:VI 2 "register_operand") > + (unspec:<VM> > + [(match_operand:VI 3 "register_operand") > + (unspec:<VM> > + [(match_operand 7 "vector_length_operand") > + (match_operand 8 "const_int_operand") > + (reg:SI VL_REGNUM) > + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) > + ] UNSPEC_OVERFLOW) > + ] UNSPEC_VADC) > + (match_operand:VI 1 "vector_merge_operand")))] > + "TARGET_VECTOR" > + [(const_int 0)] > +{ > + riscv_vector::simplify_unspec_operations (operands, PLUS, UNSPEC_VADC, > + <MODE>mode); > + DONE; > +}) > + > diff --git a/gcc/config/riscv/riscv-protos.h > b/gcc/config/riscv/riscv-protos.h > index f052757cede..6a188a3d0ef 100644 > --- a/gcc/config/riscv/riscv-protos.h > +++ b/gcc/config/riscv/riscv-protos.h > @@ -228,6 +228,8 @@ bool neg_simm5_p (rtx); > bool has_vi_variant_p (rtx_code, rtx); > void expand_vec_cmp (rtx, rtx_code, rtx, rtx); > bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool); > +void simplify_complement (rtx *, rtx_code, machine_mode); > +void simplify_unspec_operations (rtx*, rtx_code, int, machine_mode); > #endif > bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode, > bool, void (*)(rtx *, rtx)); > diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc > index 839a2c6ba71..9a9428ce18d 100644 > --- a/gcc/config/riscv/riscv-v.cc > +++ b/gcc/config/riscv/riscv-v.cc > @@ -2721,4 +2721,61 @@ expand_select_vl (rtx *ops) > emit_insn (gen_no_side_effects_vsetvl_rtx (rvv_mode, ops[0], ops[1])); > } > > +void simplify_mulh (rtx *operands, > + machine_mode mode) > +{ > + rtx zero_operand = CONST0_RTX(GET_MODE(operands[4])); > + if (rtx_equal_p(operands[4], zero_operand)) > + { > + machine_mode mask_mode = riscv_vector::get_mask_mode (mode).require > (); > + emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX (mask_mode), > + RVV_VUNDEF (mode), > + CONST0_RTX (GET_MODE (operands[0])), > + operands[5], operands[6], operands[7], > + operands[8])); > + } > +} > + > +void simplify_vadc (rtx *operands, > + machine_mode mode) > +{ > + machine_mode mask_mode = riscv_vector::get_mask_mode (mode).require (); > + > + if (rtx_equal_p(operands[2], operands[3])) > + { > + emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX (mask_mode), > + operands[1], operands[2], operands[4], > + operands[5], operands[6], > + get_avl_type_rtx (riscv_vector::VLMAX))); > + } > +} > + > +void simplify_unspec_operations (rtx *operands, > + rtx_code code, > + int unspec, > + machine_mode mode) > +{ > + switch (unspec) > + { > + case UNSPEC_VMULHS: > + case UNSPEC_VMULHU: > + case UNSPEC_VMULHSU: > + simplify_mulh (operands, mode); > + break; > + > + case UNSPEC_VADC: > + case UNSPEC_VSBC: > + simplify_vadc(operands, mode); > + break; > + > + default: > + break; > + } > +} > + > +void simplify_complement (rtx *operands, > + rtx_code code, > + machine_mode mode) > +{ > +} > } // namespace riscv_vector > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc > new file mode 100644 > index 00000000000..6e4a3d62bc0 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc > @@ -0,0 +1,19 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */ > + > +#include "riscv_vector.h" > + > +#define VMULH_WITH_LMUL(X) \ > + vint32m##X##_t shortcut_for_riscv_vmulh_case_##X (vint32m##X##_t v1,\ > + size_t vl) { \ > + return __riscv_vmulh_vx_i32m ##X (v1, 0, vl); > \ > + } > + > + > +VMULH_WITH_LMUL (1) > +VMULH_WITH_LMUL (2) > +VMULH_WITH_LMUL (4) > +VMULH_WITH_LMUL (8) > +VMULH_WITH_LMUL (f2) > + > +/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 5} */ > diff --git > a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c > b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c > index 4d2de91bc14..d1473274137 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c > @@ -50,6 +50,7 @@ void f6 (void * in, void *out, int32_t x) > __riscv_vse64_v_i64m1 (out, v3, 4); > } > > -/* { dg-final { scan-assembler-times > {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } } */ > +/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 1 } } */ > +/* { dg-final { scan-assembler-times > {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 1 } } */ > /* { dg-final { scan-assembler-times {vdiv\.vx\s+v[0-9]+,\s*v[0-9]+,zero} > 2 } } */ > /* { dg-final { scan-assembler-times {vrem\.vx\s+v[0-9]+,\s*v[0-9]+,zero} > 2 } } */ > -- > 2.41.0 > >
This is a draft patch. I would like to explain it's hard to make the simplify generic and ask for some help. There're 2 categories we need to optimize. - The op in optab such as div / 1. - The unspec operation such as mulh * 0, (vadc+vmadc) + 0. Especially for the unspec operation, I found we need to write one by one to match the special pattern. Seems there's no way to write a generic pattern that will match mulh, (vadc+vmadc), sll... This way is too complicated and not so elegant because need to write so much md patterns. Do you have any ideas? > -----Original Message----- > From: Wang, Yanzhang <yanzhang.wang@intel.com> > Sent: Friday, July 28, 2023 7:50 PM > To: gcc-patches@gcc.gnu.org > Cc: juzhe.zhong@rivai.ai; kito.cheng@sifive.com; rdapp.gcc@gmail.com; Li, > Pan2 <pan2.li@intel.com>; Wang, Yanzhang <yanzhang.wang@intel.com> > Subject: [PATCH v2] RISC-V: convert the mulh with 0 to mov 0 to the reg. > > From: Yanzhang Wang <yanzhang.wang@intel.com> > > This patch will optimize the below mulh example, > > vint32m1_t shortcut_for_riscv_vmulh_case_0(vint32m1_t v1, size_t vl) { > return __riscv_vmulh_vx_i32m1(v1, 0, vl); } > > from mulh pattern > > vsetvli zero, a2, e32, m1, ta, ma > vmulh.vx v24, v24, zero > vs1r.v v24, 0(a0) > > to below vmv. > > vsetvli zero,a2,e32,m1,ta,ma > vmv.v.i v1,0 > vs1r.v v1,0(a0) > > It will elimate the mul with const 0 instruction to the simple mov > instruction. > > Signed-off-by: Yanzhang Wang <yanzhang.wang@intel.com> > > gcc/ChangeLog: > > * config/riscv/autovec-opt.md: Add a split pattern. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/base/binop_vx_constraint-121.c: The mul > with 0 will be simplified to vmv.v.i. > * gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc: New test. > --- > gcc/config/riscv/autovec-opt.md | 58 +++++++++++++++++++ > gcc/config/riscv/riscv-protos.h | 2 + > gcc/config/riscv/riscv-v.cc | 57 ++++++++++++++++++ > .../riscv/rvv/autovec/vmulh-with-zero.cc | 19 ++++++ > .../riscv/rvv/base/binop_vx_constraint-121.c | 3 +- > 5 files changed, 138 insertions(+), 1 deletion(-) create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc > > diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec- > opt.md index 28040805b23..0d87572d1a4 100644 > --- a/gcc/config/riscv/autovec-opt.md > +++ b/gcc/config/riscv/autovec-opt.md > @@ -405,3 +405,61 @@ > "vmv.x.s\t%0,%1" > [(set_attr "type" "vimovvx") > (set_attr "mode" "<MODE>")]) > + > +;;; Simplify the mulh with 0 to move > +(define_split > + [(set (match_operand:VI_QHS 0 "register_operand") > + (if_then_else:VI_QHS > + (unspec:<VM> > + [(match_operand:<VM> 1 "vector_all_trues_mask_operand") > + (match_operand 5 "vector_length_operand") > + (match_operand 6 "const_int_operand") > + (match_operand 7 "const_int_operand") > + (match_operand 8 "const_int_operand") > + (reg:SI VL_REGNUM) > + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) > + (unspec:VI_QHS > + [(vec_duplicate:VI_QHS > + (match_operand:<VEL> 4 "reg_or_0_operand")) > + (match_operand:VI_QHS 3 "register_operand")] VMULH) > + (match_operand:VI_QHS 2 "vector_merge_operand") > + ))] > + "TARGET_VECTOR > + && rtx_equal_p (operands[4], CONST0_RTX (GET_MODE (operands[4])))" > + [(const_int 0)] > +{ > + riscv_vector::simplify_unspec_operations (operands, UNSPEC, > + <VMULH>, <MODE>mode) ; > + DONE; > +}) > + > +;;; Simplify vmadc + vadc with 0 to a simple move. > +(define_split > + [(set (match_operand:VI 0 "register_operand") > + (if_then_else:VI > + (unspec:<VM> > + [(match_operand 4 "vector_length_operand") > + (match_operand 5 "const_int_operand") > + (match_operand 6 "const_int_operand") > + (reg:SI VL_REGNUM) > + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) > + (unspec:VI > + [(match_operand:VI 2 "register_operand") > + (unspec:<VM> > + [(match_operand:VI 3 "register_operand") > + (unspec:<VM> > + [(match_operand 7 "vector_length_operand") > + (match_operand 8 "const_int_operand") > + (reg:SI VL_REGNUM) > + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) > + ] UNSPEC_OVERFLOW) > + ] UNSPEC_VADC) > + (match_operand:VI 1 "vector_merge_operand")))] > + "TARGET_VECTOR" > + [(const_int 0)] > +{ > + riscv_vector::simplify_unspec_operations (operands, PLUS, UNSPEC_VADC, > + <MODE>mode); > + DONE; > +}) > + > diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv- > protos.h index f052757cede..6a188a3d0ef 100644 > --- a/gcc/config/riscv/riscv-protos.h > +++ b/gcc/config/riscv/riscv-protos.h > @@ -228,6 +228,8 @@ bool neg_simm5_p (rtx); bool has_vi_variant_p > (rtx_code, rtx); void expand_vec_cmp (rtx, rtx_code, rtx, rtx); bool > expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool); > +void simplify_complement (rtx *, rtx_code, machine_mode); void > +simplify_unspec_operations (rtx*, rtx_code, int, machine_mode); > #endif > bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode, > bool, void (*)(rtx *, rtx)); > diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc > index 839a2c6ba71..9a9428ce18d 100644 > --- a/gcc/config/riscv/riscv-v.cc > +++ b/gcc/config/riscv/riscv-v.cc > @@ -2721,4 +2721,61 @@ expand_select_vl (rtx *ops) > emit_insn (gen_no_side_effects_vsetvl_rtx (rvv_mode, ops[0], ops[1])); } > > +void simplify_mulh (rtx *operands, > + machine_mode mode) > +{ > + rtx zero_operand = CONST0_RTX(GET_MODE(operands[4])); > + if (rtx_equal_p(operands[4], zero_operand)) > + { > + machine_mode mask_mode = riscv_vector::get_mask_mode (mode).require > (); > + emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX (mask_mode), > + RVV_VUNDEF (mode), > + CONST0_RTX (GET_MODE (operands[0])), > + operands[5], operands[6], operands[7], > + operands[8])); > + } > +} > + > +void simplify_vadc (rtx *operands, > + machine_mode mode) > +{ > + machine_mode mask_mode = riscv_vector::get_mask_mode (mode).require > +(); > + > + if (rtx_equal_p(operands[2], operands[3])) > + { > + emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX (mask_mode), > + operands[1], operands[2], operands[4], > + operands[5], operands[6], > + get_avl_type_rtx (riscv_vector::VLMAX))); > + } > +} > + > +void simplify_unspec_operations (rtx *operands, > + rtx_code code, > + int unspec, > + machine_mode mode) > +{ > + switch (unspec) > + { > + case UNSPEC_VMULHS: > + case UNSPEC_VMULHU: > + case UNSPEC_VMULHSU: > + simplify_mulh (operands, mode); > + break; > + > + case UNSPEC_VADC: > + case UNSPEC_VSBC: > + simplify_vadc(operands, mode); > + break; > + > + default: > + break; > + } > +} > + > +void simplify_complement (rtx *operands, > + rtx_code code, > + machine_mode mode) > +{ > +} > } // namespace riscv_vector > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc > new file mode 100644 > index 00000000000..6e4a3d62bc0 > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc > @@ -0,0 +1,19 @@ > +/* { dg-do compile } */ > +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */ > + > +#include "riscv_vector.h" > + > +#define VMULH_WITH_LMUL(X) \ > + vint32m##X##_t shortcut_for_riscv_vmulh_case_##X (vint32m##X##_t v1,\ > + size_t vl) { \ > + return __riscv_vmulh_vx_i32m ##X (v1, 0, vl); \ > + } > + > + > +VMULH_WITH_LMUL (1) > +VMULH_WITH_LMUL (2) > +VMULH_WITH_LMUL (4) > +VMULH_WITH_LMUL (8) > +VMULH_WITH_LMUL (f2) > + > +/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 5} */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint- > 121.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c > index 4d2de91bc14..d1473274137 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c > @@ -50,6 +50,7 @@ void f6 (void * in, void *out, int32_t x) > __riscv_vse64_v_i64m1 (out, v3, 4); } > > -/* { dg-final { scan-assembler-times {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} > 2 } } */ > +/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 1 } } */ > +/* { dg-final { scan-assembler-times > +{vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 1 } } */ > /* { dg-final { scan-assembler-times {vdiv\.vx\s+v[0-9]+,\s*v[0-9]+,zero} > 2 } } */ > /* { dg-final { scan-assembler-times {vrem\.vx\s+v[0-9]+,\s*v[0-9]+,zero} > 2 } } */ > -- > 2.41.0
> This is a draft patch. I would like to explain it's hard to make the > simplify generic and ask for some help. > > There're 2 categories we need to optimize. > > - The op in optab such as div / 1. > - The unspec operation such as mulh * 0, (vadc+vmadc) + 0. > > Especially for the unspec operation, I found we need to write one by > one to match the special pattern. Seems there's no way to write a > generic pattern that will match mulh, (vadc+vmadc), sll... This way > is too complicated and not so elegant because need to write so much > md patterns. > > Do you have any ideas? Yes, it's cumbersome having to add the patterns individually and it would be nicer to have the middle end optimize for us. However, adding new rtl expressions, especially generic ones that are useful for others and the respective optimizations is a tedious process as well. Still, just recently Roger Sayle added bitreverse and copysign. You can refer to his patch as well as the follow-up ones to get an idea of what would need to be done. ("Add RTX codes for BITREVERSE and COPYSIGN") So if we have few patterns that are really performance critical (like for some benchmark) my take is to add them in a similar way you were proposing but I would advise against using this excessively. Is the mulh case somehow common or critical? Regards Robin
On 7/28/23 06:31, Robin Dapp via Gcc-patches wrote: >> This is a draft patch. I would like to explain it's hard to make the >> simplify generic and ask for some help. >> >> There're 2 categories we need to optimize. >> >> - The op in optab such as div / 1. >> - The unspec operation such as mulh * 0, (vadc+vmadc) + 0. >> >> Especially for the unspec operation, I found we need to write one by >> one to match the special pattern. Seems there's no way to write a >> generic pattern that will match mulh, (vadc+vmadc), sll... This way >> is too complicated and not so elegant because need to write so much >> md patterns. >> >> Do you have any ideas? > > Yes, it's cumbersome having to add the patterns individually > and it would be nicer to have the middle end optimize for us. > > However, adding new rtl expressions, especially generic ones that > are useful for others and the respective optimizations is a tedious > process as well. Still, just recently Roger Sayle added bitreverse > and copysign. You can refer to his patch as well as the follow-up > ones to get an idea of what would need to be done. > ("Add RTX codes for BITREVERSE and COPYSIGN") > > So if we have few patterns that are really performance critical > (like for some benchmark) my take is to add them in a similar way you > were proposing but I would advise against using this excessively. > Is the mulh case somehow common or critical? Well, I would actually back up even further. What were the circumstances that led to the mulh with a zero operand? That would tend to be an indicator of a problem earlier. Perhaps in the gimple pipeline or the gimple->rtl conversion. I'd be a bit surprised to see a const0_rtx propagate in during the RTL pipeline, I guess it's possible, but I'd expect it to be relatively rare. The one case I could see happening would be cases from the builtin apis... Of course one might call that user error ;-) jeff
Thanks your comments, Jeff and Robin > > Is the mulh case somehow common or critical? > Well, I would actually back up even further. What were the > circumstances that led to the mulh with a zero operand? I think you both mentioned why should we add the mulh * 0 simplify. Unfortunately, I have no such a benchmark to explain the criticalness. We found there're some cases that exists in simplify_binary_operation in simplify-rtx.cc but not working for RISC-V backend. For example, - mult * 0 exists, but RISC-V has additional mulh * 0 - add + 0 / sub - 0 exists, but RISC-V has additional (madc + adc) + 0 - ... So we want to do some complement to make the simplify can cover more cases. That's the basic idea why we do these shortcut optimizations. > > However, adding new rtl expressions, especially generic ones that are > > useful for others and the respective optimizations is a tedious > > process as well. Still, just recently Roger Sayle added bitreverse > > and copysign. You can refer to his patch as well as the follow-up > > ones to get an idea of what would need to be done. > > ("Add RTX codes for BITREVERSE and COPYSIGN") Great advise. I'll have a check for the generic operations whether they can be implemented by this patch's style. It seems that we have to write specific pattern for the unspec relative insns, unfortunately. Thanks, Yanzhang > -----Original Message----- > From: Jeff Law <jeffreyalaw@gmail.com> > Sent: Saturday, July 29, 2023 7:07 AM > To: Robin Dapp <rdapp.gcc@gmail.com>; Wang, Yanzhang > <yanzhang.wang@intel.com>; gcc-patches@gcc.gnu.org > Cc: juzhe.zhong@rivai.ai; kito.cheng@sifive.com; Li, Pan2 > <pan2.li@intel.com> > Subject: Re: [PATCH v2] RISC-V: convert the mulh with 0 to mov 0 to the reg. > > > > On 7/28/23 06:31, Robin Dapp via Gcc-patches wrote: > >> This is a draft patch. I would like to explain it's hard to make the > >> simplify generic and ask for some help. > >> > >> There're 2 categories we need to optimize. > >> > >> - The op in optab such as div / 1. > >> - The unspec operation such as mulh * 0, (vadc+vmadc) + 0. > >> > >> Especially for the unspec operation, I found we need to write one by > >> one to match the special pattern. Seems there's no way to write a > >> generic pattern that will match mulh, (vadc+vmadc), sll... This way > >> is too complicated and not so elegant because need to write so much > >> md patterns. > >> > >> Do you have any ideas? > > > > Yes, it's cumbersome having to add the patterns individually and it > > would be nicer to have the middle end optimize for us. > > > > However, adding new rtl expressions, especially generic ones that are > > useful for others and the respective optimizations is a tedious > > process as well. Still, just recently Roger Sayle added bitreverse > > and copysign. You can refer to his patch as well as the follow-up > > ones to get an idea of what would need to be done. > > ("Add RTX codes for BITREVERSE and COPYSIGN") > > > > So if we have few patterns that are really performance critical (like > > for some benchmark) my take is to add them in a similar way you were > > proposing but I would advise against using this excessively. > > Is the mulh case somehow common or critical? > Well, I would actually back up even further. What were the > circumstances that led to the mulh with a zero operand? That would > tend to be an indicator of a problem earlier. Perhaps in the gimple > pipeline or the gimple->rtl conversion. I'd be a bit surprised to see a > const0_rtx propagate in during the RTL pipeline, I guess it's possible, but > I'd expect it to be relatively rare. > > The one case I could see happening would be cases from the builtin apis... > Of course one might call that user error ;-) > > > jeff
On 7/31/23 06:14, Wang, Yanzhang wrote: > Thanks your comments, Jeff and Robin > >>> Is the mulh case somehow common or critical? >> Well, I would actually back up even further. What were the >> circumstances that led to the mulh with a zero operand? > > I think you both mentioned why should we add the mulh * 0 simplify. > Unfortunately, I have no such a benchmark to explain the criticalness. We found > there're some cases that exists in simplify_binary_operation in simplify-rtx.cc > but not working for RISC-V backend. For example, > > - mult * 0 exists, but RISC-V has additional mulh * 0 > - add + 0 / sub - 0 exists, but RISC-V has additional (madc + adc) + 0 > - ... > > So we want to do some complement to make the simplify can cover more cases. > That's the basic idea why we do these shortcut optimizations. But the right place to handle this stuff is probably in the generic code, with a few exceptions. So even if you don't have a benchmark, just having non-intrinsic/builtin code which triggers these cases would be helpful so that we can figure out the best place to fix this problem. What I want to avoid is adding a bunch of patterns in the RISC-V backend for cases that are better handled by generic optimization passes. Jeff
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md index 28040805b23..0d87572d1a4 100644 --- a/gcc/config/riscv/autovec-opt.md +++ b/gcc/config/riscv/autovec-opt.md @@ -405,3 +405,61 @@ "vmv.x.s\t%0,%1" [(set_attr "type" "vimovvx") (set_attr "mode" "<MODE>")]) + +;;; Simplify the mulh with 0 to move +(define_split + [(set (match_operand:VI_QHS 0 "register_operand") + (if_then_else:VI_QHS + (unspec:<VM> + [(match_operand:<VM> 1 "vector_all_trues_mask_operand") + (match_operand 5 "vector_length_operand") + (match_operand 6 "const_int_operand") + (match_operand 7 "const_int_operand") + (match_operand 8 "const_int_operand") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec:VI_QHS + [(vec_duplicate:VI_QHS + (match_operand:<VEL> 4 "reg_or_0_operand")) + (match_operand:VI_QHS 3 "register_operand")] VMULH) + (match_operand:VI_QHS 2 "vector_merge_operand") + ))] + "TARGET_VECTOR + && rtx_equal_p (operands[4], CONST0_RTX (GET_MODE (operands[4])))" + [(const_int 0)] +{ + riscv_vector::simplify_unspec_operations (operands, UNSPEC, + <VMULH>, <MODE>mode) ; + DONE; +}) + +;;; Simplify vmadc + vadc with 0 to a simple move. +(define_split + [(set (match_operand:VI 0 "register_operand") + (if_then_else:VI + (unspec:<VM> + [(match_operand 4 "vector_length_operand") + (match_operand 5 "const_int_operand") + (match_operand 6 "const_int_operand") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + (unspec:VI + [(match_operand:VI 2 "register_operand") + (unspec:<VM> + [(match_operand:VI 3 "register_operand") + (unspec:<VM> + [(match_operand 7 "vector_length_operand") + (match_operand 8 "const_int_operand") + (reg:SI VL_REGNUM) + (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE) + ] UNSPEC_OVERFLOW) + ] UNSPEC_VADC) + (match_operand:VI 1 "vector_merge_operand")))] + "TARGET_VECTOR" + [(const_int 0)] +{ + riscv_vector::simplify_unspec_operations (operands, PLUS, UNSPEC_VADC, + <MODE>mode); + DONE; +}) + diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h index f052757cede..6a188a3d0ef 100644 --- a/gcc/config/riscv/riscv-protos.h +++ b/gcc/config/riscv/riscv-protos.h @@ -228,6 +228,8 @@ bool neg_simm5_p (rtx); bool has_vi_variant_p (rtx_code, rtx); void expand_vec_cmp (rtx, rtx_code, rtx, rtx); bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool); +void simplify_complement (rtx *, rtx_code, machine_mode); +void simplify_unspec_operations (rtx*, rtx_code, int, machine_mode); #endif bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode, bool, void (*)(rtx *, rtx)); diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc index 839a2c6ba71..9a9428ce18d 100644 --- a/gcc/config/riscv/riscv-v.cc +++ b/gcc/config/riscv/riscv-v.cc @@ -2721,4 +2721,61 @@ expand_select_vl (rtx *ops) emit_insn (gen_no_side_effects_vsetvl_rtx (rvv_mode, ops[0], ops[1])); } +void simplify_mulh (rtx *operands, + machine_mode mode) +{ + rtx zero_operand = CONST0_RTX(GET_MODE(operands[4])); + if (rtx_equal_p(operands[4], zero_operand)) + { + machine_mode mask_mode = riscv_vector::get_mask_mode (mode).require (); + emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX (mask_mode), + RVV_VUNDEF (mode), + CONST0_RTX (GET_MODE (operands[0])), + operands[5], operands[6], operands[7], + operands[8])); + } +} + +void simplify_vadc (rtx *operands, + machine_mode mode) +{ + machine_mode mask_mode = riscv_vector::get_mask_mode (mode).require (); + + if (rtx_equal_p(operands[2], operands[3])) + { + emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX (mask_mode), + operands[1], operands[2], operands[4], + operands[5], operands[6], + get_avl_type_rtx (riscv_vector::VLMAX))); + } +} + +void simplify_unspec_operations (rtx *operands, + rtx_code code, + int unspec, + machine_mode mode) +{ + switch (unspec) + { + case UNSPEC_VMULHS: + case UNSPEC_VMULHU: + case UNSPEC_VMULHSU: + simplify_mulh (operands, mode); + break; + + case UNSPEC_VADC: + case UNSPEC_VSBC: + simplify_vadc(operands, mode); + break; + + default: + break; + } +} + +void simplify_complement (rtx *operands, + rtx_code code, + machine_mode mode) +{ +} } // namespace riscv_vector diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc new file mode 100644 index 00000000000..6e4a3d62bc0 --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc @@ -0,0 +1,19 @@ +/* { dg-do compile } */ +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */ + +#include "riscv_vector.h" + +#define VMULH_WITH_LMUL(X) \ + vint32m##X##_t shortcut_for_riscv_vmulh_case_##X (vint32m##X##_t v1,\ + size_t vl) { \ + return __riscv_vmulh_vx_i32m ##X (v1, 0, vl); \ + } + + +VMULH_WITH_LMUL (1) +VMULH_WITH_LMUL (2) +VMULH_WITH_LMUL (4) +VMULH_WITH_LMUL (8) +VMULH_WITH_LMUL (f2) + +/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 5} */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c index 4d2de91bc14..d1473274137 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c @@ -50,6 +50,7 @@ void f6 (void * in, void *out, int32_t x) __riscv_vse64_v_i64m1 (out, v3, 4); } -/* { dg-final { scan-assembler-times {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } } */ +/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 1 } } */ +/* { dg-final { scan-assembler-times {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 1 } } */ /* { dg-final { scan-assembler-times {vdiv\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } } */ /* { dg-final { scan-assembler-times {vrem\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } } */