diff mbox series

[v2] RISC-V: convert the mulh with 0 to mov 0 to the reg.

Message ID 20230728115004.3071397-1-yanzhang.wang@intel.com
State New
Headers show
Series [v2] RISC-V: convert the mulh with 0 to mov 0 to the reg. | expand

Commit Message

Li, Pan2 via Gcc-patches July 28, 2023, 11:50 a.m. UTC
From: Yanzhang Wang <yanzhang.wang@intel.com>

This patch will optimize the below mulh example,

vint32m1_t shortcut_for_riscv_vmulh_case_0(vint32m1_t v1, size_t vl) {
  return __riscv_vmulh_vx_i32m1(v1, 0, vl);
}

from mulh pattern

vsetvli   zero, a2, e32, m1, ta, ma
vmulh.vx  v24, v24, zero
vs1r.v    v24, 0(a0)

to below vmv.

vsetvli zero,a2,e32,m1,ta,ma
vmv.v.i v1,0
vs1r.v  v1,0(a0)

It will elimate the mul with const 0 instruction to the simple mov
instruction.

Signed-off-by: Yanzhang Wang <yanzhang.wang@intel.com>

gcc/ChangeLog:

	* config/riscv/autovec-opt.md: Add a split pattern.

gcc/testsuite/ChangeLog:

	* gcc.target/riscv/rvv/base/binop_vx_constraint-121.c: The mul
	  with 0 will be simplified to vmv.v.i.
	* gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc: New test.
---
 gcc/config/riscv/autovec-opt.md               | 58 +++++++++++++++++++
 gcc/config/riscv/riscv-protos.h               |  2 +
 gcc/config/riscv/riscv-v.cc                   | 57 ++++++++++++++++++
 .../riscv/rvv/autovec/vmulh-with-zero.cc      | 19 ++++++
 .../riscv/rvv/base/binop_vx_constraint-121.c  |  3 +-
 5 files changed, 138 insertions(+), 1 deletion(-)
 create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc

Comments

Kito Cheng July 28, 2023, noon UTC | #1
<yanzhang.wang@intel.com> 於 2023年7月28日 週五 19:50 寫道:

> From: Yanzhang Wang <yanzhang.wang@intel.com>
>
> This patch will optimize the below mulh example,
>
> vint32m1_t shortcut_for_riscv_vmulh_case_0(vint32m1_t v1, size_t vl) {
>   return __riscv_vmulh_vx_i32m1(v1, 0, vl);
> }
>
> from mulh pattern
>
> vsetvli   zero, a2, e32, m1, ta, ma
> vmulh.vx  v24, v24, zero
> vs1r.v    v24, 0(a0)
>
> to below vmv.
>
> vsetvli zero,a2,e32,m1,ta,ma
> vmv.v.i v1,0
> vs1r.v  v1,0(a0)
>
> It will elimate the mul with const 0 instruction to the simple mov
> instruction.
>
> Signed-off-by: Yanzhang Wang <yanzhang.wang@intel.com>
>
> gcc/ChangeLog:
>
>         * config/riscv/autovec-opt.md: Add a split pattern.
>
> gcc/testsuite/ChangeLog:
>
>         * gcc.target/riscv/rvv/base/binop_vx_constraint-121.c: The mul
>           with 0 will be simplified to vmv.v.i.
>         * gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc: New test.
> ---
>  gcc/config/riscv/autovec-opt.md               | 58 +++++++++++++++++++
>  gcc/config/riscv/riscv-protos.h               |  2 +
>  gcc/config/riscv/riscv-v.cc                   | 57 ++++++++++++++++++
>  .../riscv/rvv/autovec/vmulh-with-zero.cc      | 19 ++++++
>  .../riscv/rvv/base/binop_vx_constraint-121.c  |  3 +-
>  5 files changed, 138 insertions(+), 1 deletion(-)
>  create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
>
> diff --git a/gcc/config/riscv/autovec-opt.md
> b/gcc/config/riscv/autovec-opt.md
> index 28040805b23..0d87572d1a4 100644
> --- a/gcc/config/riscv/autovec-opt.md
> +++ b/gcc/config/riscv/autovec-opt.md
> @@ -405,3 +405,61 @@
>    "vmv.x.s\t%0,%1"
>    [(set_attr "type" "vimovvx")
>     (set_attr "mode" "<MODE>")])
> +
> +;;; Simplify the mulh with 0 to move
> +(define_split
> +  [(set (match_operand:VI_QHS 0 "register_operand")
> +     (if_then_else:VI_QHS
> +       (unspec:<VM>
> +        [(match_operand:<VM> 1 "vector_all_trues_mask_operand")
> +          (match_operand 5 "vector_length_operand")
> +          (match_operand 6 "const_int_operand")
> +          (match_operand 7 "const_int_operand")
> +          (match_operand 8 "const_int_operand")
> +          (reg:SI VL_REGNUM)
> +          (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
> +       (unspec:VI_QHS
> +        [(vec_duplicate:VI_QHS
> +           (match_operand:<VEL> 4 "reg_or_0_operand"))
>

This could be just a const int zero rather than a match operand

+          (match_operand:VI_QHS 3 "register_operand")] VMULH)
> +       (match_operand:VI_QHS 2 "vector_merge_operand")
> +       ))]
> +  "TARGET_VECTOR
> +     && rtx_equal_p (operands[4], CONST0_RTX (GET_MODE (operands[4])))"
>

Then no need to check here.


+  [(const_int 0)]
> +{
> +  riscv_vector::simplify_unspec_operations (operands, UNSPEC,
> +                                            <VMULH>, <MODE>mode) ;
> +  DONE;
> +})
> +
> +;;; Simplify vmadc + vadc with 0 to a simple move.
> +(define_split
> +  [(set (match_operand:VI 0 "register_operand")
> +     (if_then_else:VI
> +       (unspec:<VM>
> +        [(match_operand 4 "vector_length_operand")
> +          (match_operand 5 "const_int_operand")
> +          (match_operand 6 "const_int_operand")
> +          (reg:SI VL_REGNUM)
> +          (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
> +       (unspec:VI
> +        [(match_operand:VI 2 "register_operand")
> +          (unspec:<VM>
> +            [(match_operand:VI 3 "register_operand")
> +              (unspec:<VM>
> +                [(match_operand 7 "vector_length_operand")
> +                  (match_operand 8 "const_int_operand")
> +                  (reg:SI VL_REGNUM)
> +                  (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
> +              ] UNSPEC_OVERFLOW)
> +          ] UNSPEC_VADC)
> +       (match_operand:VI 1 "vector_merge_operand")))]
> +  "TARGET_VECTOR"
> +  [(const_int 0)]
> +{
> +  riscv_vector::simplify_unspec_operations (operands, PLUS, UNSPEC_VADC,
> +                                           <MODE>mode);
> +  DONE;
> +})
> +
> diff --git a/gcc/config/riscv/riscv-protos.h
> b/gcc/config/riscv/riscv-protos.h
> index f052757cede..6a188a3d0ef 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -228,6 +228,8 @@ bool neg_simm5_p (rtx);
>  bool has_vi_variant_p (rtx_code, rtx);
>  void expand_vec_cmp (rtx, rtx_code, rtx, rtx);
>  bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
> +void simplify_complement (rtx *, rtx_code, machine_mode);
> +void simplify_unspec_operations (rtx*, rtx_code, int, machine_mode);
>  #endif
>  bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
>                           bool, void (*)(rtx *, rtx));
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 839a2c6ba71..9a9428ce18d 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -2721,4 +2721,61 @@ expand_select_vl (rtx *ops)
>    emit_insn (gen_no_side_effects_vsetvl_rtx (rvv_mode, ops[0], ops[1]));
>  }
>
> +void simplify_mulh (rtx *operands,
> +                   machine_mode mode)
> +{
> +  rtx zero_operand = CONST0_RTX(GET_MODE(operands[4]));
> +  if (rtx_equal_p(operands[4], zero_operand))
> +    {
> +      machine_mode mask_mode = riscv_vector::get_mask_mode (mode).require
> ();
> +      emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX (mask_mode),
> +                              RVV_VUNDEF (mode),
> +                              CONST0_RTX (GET_MODE (operands[0])),
> +                              operands[5], operands[6], operands[7],
> +                              operands[8]));
> +    }
> +}
> +
> +void simplify_vadc (rtx *operands,
> +                  machine_mode mode)
> +{
> +  machine_mode mask_mode = riscv_vector::get_mask_mode (mode).require ();
> +
> +  if (rtx_equal_p(operands[2], operands[3]))
> +    {
> +      emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX (mask_mode),
> +                              operands[1], operands[2], operands[4],
> +                              operands[5], operands[6],
> +                              get_avl_type_rtx (riscv_vector::VLMAX)));
> +    }
> +}
> +
> +void simplify_unspec_operations (rtx *operands,
> +                                rtx_code code,
> +                                int unspec,
> +                                machine_mode mode)
> +{
> +  switch (unspec)
> +    {
> +    case UNSPEC_VMULHS:
> +    case UNSPEC_VMULHU:
> +    case UNSPEC_VMULHSU:
> +      simplify_mulh (operands, mode);
> +      break;
> +
> +    case UNSPEC_VADC:
> +    case UNSPEC_VSBC:
> +      simplify_vadc(operands, mode);
> +      break;
> +
> +    default:
> +      break;
> +    }
> +}
> +
> +void simplify_complement (rtx *operands,
> +                         rtx_code code,
> +                         machine_mode mode)
> +{
> +}
>  } // namespace riscv_vector
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
> new file mode 100644
> index 00000000000..6e4a3d62bc0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
> +
> +#include "riscv_vector.h"
> +
> +#define VMULH_WITH_LMUL(X) \
> +  vint32m##X##_t shortcut_for_riscv_vmulh_case_##X (vint32m##X##_t v1,\
> +                                                    size_t vl) {      \
> +    return __riscv_vmulh_vx_i32m ##X (v1, 0, vl);
>      \
> +  }
> +
> +
> +VMULH_WITH_LMUL (1)
> +VMULH_WITH_LMUL (2)
> +VMULH_WITH_LMUL (4)
> +VMULH_WITH_LMUL (8)
> +VMULH_WITH_LMUL (f2)
> +
> +/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 5} */
> diff --git
> a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
> index 4d2de91bc14..d1473274137 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
> @@ -50,6 +50,7 @@ void f6 (void * in, void *out, int32_t x)
>      __riscv_vse64_v_i64m1 (out, v3, 4);
>  }
>
> -/* { dg-final { scan-assembler-times
> {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } } */
> +/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 1 } } */
> +/* { dg-final { scan-assembler-times
> {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 1 } } */
>  /* { dg-final { scan-assembler-times {vdiv\.vx\s+v[0-9]+,\s*v[0-9]+,zero}
> 2 } } */
>  /* { dg-final { scan-assembler-times {vrem\.vx\s+v[0-9]+,\s*v[0-9]+,zero}
> 2 } } */
> --
> 2.41.0
>
>
Li, Pan2 via Gcc-patches July 28, 2023, noon UTC | #2
This is a draft patch. I would like to explain it's hard to make the
simplify generic and ask for some help.

There're 2 categories we need to optimize.

- The op in optab such as div / 1.
- The unspec operation such as mulh * 0, (vadc+vmadc) + 0.

Especially for the unspec operation, I found we need to write one by
one to match the special pattern. Seems there's no way to write a
generic pattern that will match mulh, (vadc+vmadc), sll... This way
is too complicated and not so elegant because need to write so much
md patterns.

Do you have any ideas?

> -----Original Message-----
> From: Wang, Yanzhang <yanzhang.wang@intel.com>
> Sent: Friday, July 28, 2023 7:50 PM
> To: gcc-patches@gcc.gnu.org
> Cc: juzhe.zhong@rivai.ai; kito.cheng@sifive.com; rdapp.gcc@gmail.com; Li,
> Pan2 <pan2.li@intel.com>; Wang, Yanzhang <yanzhang.wang@intel.com>
> Subject: [PATCH v2] RISC-V: convert the mulh with 0 to mov 0 to the reg.
> 
> From: Yanzhang Wang <yanzhang.wang@intel.com>
> 
> This patch will optimize the below mulh example,
> 
> vint32m1_t shortcut_for_riscv_vmulh_case_0(vint32m1_t v1, size_t vl) {
>   return __riscv_vmulh_vx_i32m1(v1, 0, vl); }
> 
> from mulh pattern
> 
> vsetvli   zero, a2, e32, m1, ta, ma
> vmulh.vx  v24, v24, zero
> vs1r.v    v24, 0(a0)
> 
> to below vmv.
> 
> vsetvli zero,a2,e32,m1,ta,ma
> vmv.v.i v1,0
> vs1r.v  v1,0(a0)
> 
> It will elimate the mul with const 0 instruction to the simple mov
> instruction.
> 
> Signed-off-by: Yanzhang Wang <yanzhang.wang@intel.com>
> 
> gcc/ChangeLog:
> 
> 	* config/riscv/autovec-opt.md: Add a split pattern.
> 
> gcc/testsuite/ChangeLog:
> 
> 	* gcc.target/riscv/rvv/base/binop_vx_constraint-121.c: The mul
> 	  with 0 will be simplified to vmv.v.i.
> 	* gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc: New test.
> ---
>  gcc/config/riscv/autovec-opt.md               | 58 +++++++++++++++++++
>  gcc/config/riscv/riscv-protos.h               |  2 +
>  gcc/config/riscv/riscv-v.cc                   | 57 ++++++++++++++++++
>  .../riscv/rvv/autovec/vmulh-with-zero.cc      | 19 ++++++
>  .../riscv/rvv/base/binop_vx_constraint-121.c  |  3 +-
>  5 files changed, 138 insertions(+), 1 deletion(-)  create mode 100644
> gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
> 
> diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-
> opt.md index 28040805b23..0d87572d1a4 100644
> --- a/gcc/config/riscv/autovec-opt.md
> +++ b/gcc/config/riscv/autovec-opt.md
> @@ -405,3 +405,61 @@
>    "vmv.x.s\t%0,%1"
>    [(set_attr "type" "vimovvx")
>     (set_attr "mode" "<MODE>")])
> +
> +;;; Simplify the mulh with 0 to move
> +(define_split
> +  [(set (match_operand:VI_QHS 0 "register_operand")
> +     (if_then_else:VI_QHS
> +       (unspec:<VM>
> +	 [(match_operand:<VM> 1 "vector_all_trues_mask_operand")
> +	   (match_operand 5 "vector_length_operand")
> +	   (match_operand 6 "const_int_operand")
> +	   (match_operand 7 "const_int_operand")
> +	   (match_operand 8 "const_int_operand")
> +	   (reg:SI VL_REGNUM)
> +	   (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
> +       (unspec:VI_QHS
> +	 [(vec_duplicate:VI_QHS
> +	    (match_operand:<VEL> 4 "reg_or_0_operand"))
> +	   (match_operand:VI_QHS 3 "register_operand")] VMULH)
> +       (match_operand:VI_QHS 2 "vector_merge_operand")
> +       ))]
> +  "TARGET_VECTOR
> +     && rtx_equal_p (operands[4], CONST0_RTX (GET_MODE (operands[4])))"
> +  [(const_int 0)]
> +{
> +  riscv_vector::simplify_unspec_operations (operands, UNSPEC,
> +					     <VMULH>, <MODE>mode) ;
> +  DONE;
> +})
> +
> +;;; Simplify vmadc + vadc with 0 to a simple move.
> +(define_split
> +  [(set (match_operand:VI 0 "register_operand")
> +     (if_then_else:VI
> +       (unspec:<VM>
> +	 [(match_operand 4 "vector_length_operand")
> +	   (match_operand 5 "const_int_operand")
> +	   (match_operand 6 "const_int_operand")
> +	   (reg:SI VL_REGNUM)
> +	   (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
> +       (unspec:VI
> +	 [(match_operand:VI 2 "register_operand")
> +	   (unspec:<VM>
> +	     [(match_operand:VI 3 "register_operand")
> +	       (unspec:<VM>
> +		 [(match_operand 7 "vector_length_operand")
> +		   (match_operand 8 "const_int_operand")
> +		   (reg:SI VL_REGNUM)
> +		   (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
> +	       ] UNSPEC_OVERFLOW)
> +	   ] UNSPEC_VADC)
> +       (match_operand:VI 1 "vector_merge_operand")))]
> +  "TARGET_VECTOR"
> +  [(const_int 0)]
> +{
> +  riscv_vector::simplify_unspec_operations (operands, PLUS, UNSPEC_VADC,
> +					    <MODE>mode);
> +  DONE;
> +})
> +
> diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-
> protos.h index f052757cede..6a188a3d0ef 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -228,6 +228,8 @@ bool neg_simm5_p (rtx);  bool has_vi_variant_p
> (rtx_code, rtx);  void expand_vec_cmp (rtx, rtx_code, rtx, rtx);  bool
> expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
> +void simplify_complement (rtx *, rtx_code, machine_mode); void
> +simplify_unspec_operations (rtx*, rtx_code, int, machine_mode);
>  #endif
>  bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
>  			  bool, void (*)(rtx *, rtx));
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 839a2c6ba71..9a9428ce18d 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -2721,4 +2721,61 @@ expand_select_vl (rtx *ops)
>    emit_insn (gen_no_side_effects_vsetvl_rtx (rvv_mode, ops[0], ops[1]));  }
> 
> +void simplify_mulh (rtx *operands,
> +		    machine_mode mode)
> +{
> +  rtx zero_operand = CONST0_RTX(GET_MODE(operands[4]));
> +  if (rtx_equal_p(operands[4], zero_operand))
> +    {
> +      machine_mode mask_mode = riscv_vector::get_mask_mode (mode).require
> ();
> +      emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX (mask_mode),
> +			       RVV_VUNDEF (mode),
> +			       CONST0_RTX (GET_MODE (operands[0])),
> +			       operands[5], operands[6], operands[7],
> +			       operands[8]));
> +    }
> +}
> +
> +void simplify_vadc (rtx *operands,
> +		   machine_mode mode)
> +{
> +  machine_mode mask_mode = riscv_vector::get_mask_mode (mode).require
> +();
> +
> +  if (rtx_equal_p(operands[2], operands[3]))
> +    {
> +      emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX (mask_mode),
> +			       operands[1], operands[2], operands[4],
> +			       operands[5], operands[6],
> +			       get_avl_type_rtx (riscv_vector::VLMAX)));
> +    }
> +}
> +
> +void simplify_unspec_operations (rtx *operands,
> +				 rtx_code code,
> +				 int unspec,
> +				 machine_mode mode)
> +{
> +  switch (unspec)
> +    {
> +    case UNSPEC_VMULHS:
> +    case UNSPEC_VMULHU:
> +    case UNSPEC_VMULHSU:
> +      simplify_mulh (operands, mode);
> +      break;
> +
> +    case UNSPEC_VADC:
> +    case UNSPEC_VSBC:
> +      simplify_vadc(operands, mode);
> +      break;
> +
> +    default:
> +      break;
> +    }
> +}
> +
> +void simplify_complement (rtx *operands,
> +			  rtx_code code,
> +			  machine_mode mode)
> +{
> +}
>  } // namespace riscv_vector
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
> new file mode 100644
> index 00000000000..6e4a3d62bc0
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
> @@ -0,0 +1,19 @@
> +/* { dg-do compile } */
> +/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
> +
> +#include "riscv_vector.h"
> +
> +#define VMULH_WITH_LMUL(X) \
> +  vint32m##X##_t shortcut_for_riscv_vmulh_case_##X (vint32m##X##_t v1,\
> +						     size_t vl) {      \
> +    return __riscv_vmulh_vx_i32m ##X (v1, 0, vl);              	       \
> +  }
> +
> +
> +VMULH_WITH_LMUL (1)
> +VMULH_WITH_LMUL (2)
> +VMULH_WITH_LMUL (4)
> +VMULH_WITH_LMUL (8)
> +VMULH_WITH_LMUL (f2)
> +
> +/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 5} */
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-
> 121.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
> index 4d2de91bc14..d1473274137 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
> @@ -50,6 +50,7 @@ void f6 (void * in, void *out, int32_t x)
>      __riscv_vse64_v_i64m1 (out, v3, 4);  }
> 
> -/* { dg-final { scan-assembler-times {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero}
> 2 } } */
> +/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 1 } } */
> +/* { dg-final { scan-assembler-times
> +{vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 1 } } */
>  /* { dg-final { scan-assembler-times {vdiv\.vx\s+v[0-9]+,\s*v[0-9]+,zero}
> 2 } } */
>  /* { dg-final { scan-assembler-times {vrem\.vx\s+v[0-9]+,\s*v[0-9]+,zero}
> 2 } } */
> --
> 2.41.0
Robin Dapp July 28, 2023, 12:31 p.m. UTC | #3
> This is a draft patch. I would like to explain it's hard to make the
> simplify generic and ask for some help.
> 
> There're 2 categories we need to optimize.
> 
> - The op in optab such as div / 1.
> - The unspec operation such as mulh * 0, (vadc+vmadc) + 0.
> 
> Especially for the unspec operation, I found we need to write one by
> one to match the special pattern. Seems there's no way to write a
> generic pattern that will match mulh, (vadc+vmadc), sll... This way
> is too complicated and not so elegant because need to write so much
> md patterns.
> 
> Do you have any ideas?

Yes, it's cumbersome having to add the patterns individually
and it would be nicer to have the middle end optimize for us.

However, adding new rtl expressions, especially generic ones that
are useful for others and the respective optimizations is a tedious
process as well.  Still, just recently Roger Sayle added bitreverse
and copysign.  You can refer to his patch as well as the follow-up
ones to get an idea of what would need to be done.
("Add RTX codes for BITREVERSE and COPYSIGN")

So if we have few patterns that are really performance critical
(like for some benchmark) my take is to add them in a similar way you
were proposing but I would advise against using this excessively.
Is the mulh case somehow common or critical?

Regards
 Robin
Jeff Law July 28, 2023, 11:07 p.m. UTC | #4
On 7/28/23 06:31, Robin Dapp via Gcc-patches wrote:
>> This is a draft patch. I would like to explain it's hard to make the
>> simplify generic and ask for some help.
>>
>> There're 2 categories we need to optimize.
>>
>> - The op in optab such as div / 1.
>> - The unspec operation such as mulh * 0, (vadc+vmadc) + 0.
>>
>> Especially for the unspec operation, I found we need to write one by
>> one to match the special pattern. Seems there's no way to write a
>> generic pattern that will match mulh, (vadc+vmadc), sll... This way
>> is too complicated and not so elegant because need to write so much
>> md patterns.
>>
>> Do you have any ideas?
> 
> Yes, it's cumbersome having to add the patterns individually
> and it would be nicer to have the middle end optimize for us.
> 
> However, adding new rtl expressions, especially generic ones that
> are useful for others and the respective optimizations is a tedious
> process as well.  Still, just recently Roger Sayle added bitreverse
> and copysign.  You can refer to his patch as well as the follow-up
> ones to get an idea of what would need to be done.
> ("Add RTX codes for BITREVERSE and COPYSIGN")
> 
> So if we have few patterns that are really performance critical
> (like for some benchmark) my take is to add them in a similar way you
> were proposing but I would advise against using this excessively.
> Is the mulh case somehow common or critical?
Well, I would actually back up even further.  What were the 
circumstances that led to the mulh with a zero operand?   That would 
tend to be an indicator of a problem earlier.  Perhaps in the gimple 
pipeline or the gimple->rtl conversion.  I'd be a bit surprised to see a 
const0_rtx propagate in during the RTL pipeline, I guess it's possible, 
but I'd expect it to be relatively rare.

The one case I could see happening would be cases from the builtin 
apis...  Of course one might call that user error ;-)


jeff
Li, Pan2 via Gcc-patches July 31, 2023, 12:14 p.m. UTC | #5
Thanks your comments, Jeff and Robin

> > Is the mulh case somehow common or critical?
> Well, I would actually back up even further.  What were the
> circumstances that led to the mulh with a zero operand?   

I think you both mentioned why should we add the mulh * 0 simplify.
Unfortunately, I have no such a benchmark to explain the criticalness. We found
there're some cases that exists in simplify_binary_operation in simplify-rtx.cc
but not working for RISC-V backend. For example,

- mult * 0 exists, but RISC-V has additional mulh * 0
- add + 0 / sub - 0 exists, but RISC-V has additional (madc + adc) + 0
- ...

So we want to do some complement to make the simplify can cover more cases.
That's the basic idea why we do these shortcut optimizations.

> > However, adding new rtl expressions, especially generic ones that are
> > useful for others and the respective optimizations is a tedious
> > process as well.  Still, just recently Roger Sayle added bitreverse
> > and copysign.  You can refer to his patch as well as the follow-up
> > ones to get an idea of what would need to be done.
> > ("Add RTX codes for BITREVERSE and COPYSIGN")

Great advise. I'll have a check for the generic operations whether they can
be implemented by this patch's style. It seems that we have to write specific
pattern for the unspec relative insns, unfortunately.

Thanks,
Yanzhang

> -----Original Message-----
> From: Jeff Law <jeffreyalaw@gmail.com>
> Sent: Saturday, July 29, 2023 7:07 AM
> To: Robin Dapp <rdapp.gcc@gmail.com>; Wang, Yanzhang
> <yanzhang.wang@intel.com>; gcc-patches@gcc.gnu.org
> Cc: juzhe.zhong@rivai.ai; kito.cheng@sifive.com; Li, Pan2
> <pan2.li@intel.com>
> Subject: Re: [PATCH v2] RISC-V: convert the mulh with 0 to mov 0 to the reg.
> 
> 
> 
> On 7/28/23 06:31, Robin Dapp via Gcc-patches wrote:
> >> This is a draft patch. I would like to explain it's hard to make the
> >> simplify generic and ask for some help.
> >>
> >> There're 2 categories we need to optimize.
> >>
> >> - The op in optab such as div / 1.
> >> - The unspec operation such as mulh * 0, (vadc+vmadc) + 0.
> >>
> >> Especially for the unspec operation, I found we need to write one by
> >> one to match the special pattern. Seems there's no way to write a
> >> generic pattern that will match mulh, (vadc+vmadc), sll... This way
> >> is too complicated and not so elegant because need to write so much
> >> md patterns.
> >>
> >> Do you have any ideas?
> >
> > Yes, it's cumbersome having to add the patterns individually and it
> > would be nicer to have the middle end optimize for us.
> >
> > However, adding new rtl expressions, especially generic ones that are
> > useful for others and the respective optimizations is a tedious
> > process as well.  Still, just recently Roger Sayle added bitreverse
> > and copysign.  You can refer to his patch as well as the follow-up
> > ones to get an idea of what would need to be done.
> > ("Add RTX codes for BITREVERSE and COPYSIGN")
> >
> > So if we have few patterns that are really performance critical (like
> > for some benchmark) my take is to add them in a similar way you were
> > proposing but I would advise against using this excessively.
> > Is the mulh case somehow common or critical?
> Well, I would actually back up even further.  What were the
> circumstances that led to the mulh with a zero operand?   That would
> tend to be an indicator of a problem earlier.  Perhaps in the gimple
> pipeline or the gimple->rtl conversion.  I'd be a bit surprised to see a
> const0_rtx propagate in during the RTL pipeline, I guess it's possible, but
> I'd expect it to be relatively rare.
> 
> The one case I could see happening would be cases from the builtin apis...
> Of course one might call that user error ;-)
> 
> 
> jeff
Jeff Law July 31, 2023, 3:48 p.m. UTC | #6
On 7/31/23 06:14, Wang, Yanzhang wrote:
> Thanks your comments, Jeff and Robin
> 
>>> Is the mulh case somehow common or critical?
>> Well, I would actually back up even further.  What were the
>> circumstances that led to the mulh with a zero operand?
> 
> I think you both mentioned why should we add the mulh * 0 simplify.
> Unfortunately, I have no such a benchmark to explain the criticalness. We found
> there're some cases that exists in simplify_binary_operation in simplify-rtx.cc
> but not working for RISC-V backend. For example,
> 
> - mult * 0 exists, but RISC-V has additional mulh * 0
> - add + 0 / sub - 0 exists, but RISC-V has additional (madc + adc) + 0
> - ...
> 
> So we want to do some complement to make the simplify can cover more cases.
> That's the basic idea why we do these shortcut optimizations.
But the right place to handle this stuff is probably in the generic 
code, with a few exceptions.

So even if you don't have a benchmark, just having non-intrinsic/builtin 
code which triggers these cases would be helpful so that we can figure 
out the best place to fix this problem.  What I want to avoid is adding 
a bunch of patterns in the RISC-V backend for cases that are better 
handled by generic optimization passes.



Jeff
diff mbox series

Patch

diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 28040805b23..0d87572d1a4 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -405,3 +405,61 @@ 
   "vmv.x.s\t%0,%1"
   [(set_attr "type" "vimovvx")
    (set_attr "mode" "<MODE>")])
+
+;;; Simplify the mulh with 0 to move
+(define_split
+  [(set (match_operand:VI_QHS 0 "register_operand")
+     (if_then_else:VI_QHS
+       (unspec:<VM>
+	 [(match_operand:<VM> 1 "vector_all_trues_mask_operand")
+	   (match_operand 5 "vector_length_operand")
+	   (match_operand 6 "const_int_operand")
+	   (match_operand 7 "const_int_operand")
+	   (match_operand 8 "const_int_operand")
+	   (reg:SI VL_REGNUM)
+	   (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+       (unspec:VI_QHS
+	 [(vec_duplicate:VI_QHS
+	    (match_operand:<VEL> 4 "reg_or_0_operand"))
+	   (match_operand:VI_QHS 3 "register_operand")] VMULH)
+       (match_operand:VI_QHS 2 "vector_merge_operand")
+       ))]
+  "TARGET_VECTOR
+     && rtx_equal_p (operands[4], CONST0_RTX (GET_MODE (operands[4])))"
+  [(const_int 0)]
+{
+  riscv_vector::simplify_unspec_operations (operands, UNSPEC,
+					     <VMULH>, <MODE>mode) ;
+  DONE;
+})
+
+;;; Simplify vmadc + vadc with 0 to a simple move.
+(define_split
+  [(set (match_operand:VI 0 "register_operand")
+     (if_then_else:VI
+       (unspec:<VM>
+	 [(match_operand 4 "vector_length_operand")
+	   (match_operand 5 "const_int_operand")
+	   (match_operand 6 "const_int_operand")
+	   (reg:SI VL_REGNUM)
+	   (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+       (unspec:VI
+	 [(match_operand:VI 2 "register_operand")
+	   (unspec:<VM>
+	     [(match_operand:VI 3 "register_operand")
+	       (unspec:<VM>
+		 [(match_operand 7 "vector_length_operand")
+		   (match_operand 8 "const_int_operand")
+		   (reg:SI VL_REGNUM)
+		   (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	       ] UNSPEC_OVERFLOW)
+	   ] UNSPEC_VADC)
+       (match_operand:VI 1 "vector_merge_operand")))]
+  "TARGET_VECTOR"
+  [(const_int 0)]
+{
+  riscv_vector::simplify_unspec_operations (operands, PLUS, UNSPEC_VADC,
+					    <MODE>mode);
+  DONE;
+})
+
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index f052757cede..6a188a3d0ef 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -228,6 +228,8 @@  bool neg_simm5_p (rtx);
 bool has_vi_variant_p (rtx_code, rtx);
 void expand_vec_cmp (rtx, rtx_code, rtx, rtx);
 bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
+void simplify_complement (rtx *, rtx_code, machine_mode);
+void simplify_unspec_operations (rtx*, rtx_code, int, machine_mode);
 #endif
 bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
 			  bool, void (*)(rtx *, rtx));
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 839a2c6ba71..9a9428ce18d 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -2721,4 +2721,61 @@  expand_select_vl (rtx *ops)
   emit_insn (gen_no_side_effects_vsetvl_rtx (rvv_mode, ops[0], ops[1]));
 }
 
+void simplify_mulh (rtx *operands,
+		    machine_mode mode)
+{
+  rtx zero_operand = CONST0_RTX(GET_MODE(operands[4]));
+  if (rtx_equal_p(operands[4], zero_operand))
+    {
+      machine_mode mask_mode = riscv_vector::get_mask_mode (mode).require ();
+      emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX (mask_mode),
+			       RVV_VUNDEF (mode),
+			       CONST0_RTX (GET_MODE (operands[0])),
+			       operands[5], operands[6], operands[7],
+			       operands[8]));
+    }
+}
+
+void simplify_vadc (rtx *operands,
+		   machine_mode mode)
+{
+  machine_mode mask_mode = riscv_vector::get_mask_mode (mode).require ();
+
+  if (rtx_equal_p(operands[2], operands[3]))
+    {
+      emit_insn (gen_pred_mov (mode, operands[0], CONST1_RTX (mask_mode),
+			       operands[1], operands[2], operands[4],
+			       operands[5], operands[6],
+			       get_avl_type_rtx (riscv_vector::VLMAX)));
+    }
+}
+
+void simplify_unspec_operations (rtx *operands,
+				 rtx_code code,
+				 int unspec,
+				 machine_mode mode)
+{
+  switch (unspec)
+    {
+    case UNSPEC_VMULHS:
+    case UNSPEC_VMULHU:
+    case UNSPEC_VMULHSU:
+      simplify_mulh (operands, mode);
+      break;
+
+    case UNSPEC_VADC:
+    case UNSPEC_VSBC:
+      simplify_vadc(operands, mode);
+      break;
+
+    default:
+      break;
+    }
+}
+
+void simplify_complement (rtx *operands,
+			  rtx_code code,
+			  machine_mode mode)
+{
+}
 } // namespace riscv_vector
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
new file mode 100644
index 00000000000..6e4a3d62bc0
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vmulh-with-zero.cc
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-options "-march=rv64gcv -mabi=lp64 -O3 -Wno-psabi" } */
+
+#include "riscv_vector.h"
+
+#define VMULH_WITH_LMUL(X) \
+  vint32m##X##_t shortcut_for_riscv_vmulh_case_##X (vint32m##X##_t v1,\
+						     size_t vl) {      \
+    return __riscv_vmulh_vx_i32m ##X (v1, 0, vl);              	       \
+  }
+
+
+VMULH_WITH_LMUL (1)
+VMULH_WITH_LMUL (2)
+VMULH_WITH_LMUL (4)
+VMULH_WITH_LMUL (8)
+VMULH_WITH_LMUL (f2)
+
+/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 5} */
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
index 4d2de91bc14..d1473274137 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-121.c
@@ -50,6 +50,7 @@  void f6 (void * in, void *out, int32_t x)
     __riscv_vse64_v_i64m1 (out, v3, 4);
 }
 
-/* { dg-final { scan-assembler-times {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } } */
+/* { dg-final { scan-assembler-times {vmv\.v\.i\sv[0-9]+,0} 1 } } */
+/* { dg-final { scan-assembler-times {vmulh\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 1 } } */
 /* { dg-final { scan-assembler-times {vdiv\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } } */
 /* { dg-final { scan-assembler-times {vrem\.vx\s+v[0-9]+,\s*v[0-9]+,zero} 2 } } */