Message ID | 20230322121556.94496-1-juzhe.zhong@rivai.ai |
---|---|
State | New |
Headers | show |
Series | RISC-V: Fix redundant vmv1r.v instruction in vmsge.vx codegen | expand |
LGTM, but pending this to the GCC 14 queue. On Wed, Mar 22, 2023 at 8:16 PM <juzhe.zhong@rivai.ai> wrote: > > From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai> > > Current expansion of vmsge will make RA produce redundant vmv1r.v. > > testcase: > void f1 (void * in, void *out, int32_t x) > { > vbool32_t mask = *(vbool32_t*)in; > asm volatile ("":::"memory"); > vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4); > vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4); > vbool32_t m3 = __riscv_vmsge_vx_i32m1_b32 (v, x, 4); > vbool32_t m4 = __riscv_vmsge_vx_i32m1_b32_mu (mask, m3, v, x, 4); > m4 = __riscv_vmsge_vv_i32m1_b32_m (m4, v2, v2, 4); > __riscv_vsm_v_b32 (out, m4, 4); > } > > Before this patch: > f1: > vsetvli a5,zero,e8,mf4,ta,ma > vlm.v v0,0(a0) > vsetivli zero,4,e32,m1,ta,mu > vle32.v v3,0(a0) > vle32.v v2,0(a0),v0.t > vmslt.vx v1,v3,a2 > vmnot.m v1,v1 > vmslt.vx v1,v3,a2,v0.t > vmxor.mm v1,v1,v0 > vmv1r.v v0,v1 > vmsge.vv v2,v2,v2,v0.t > vsm.v v2,0(a1) > ret > > After this patch: > f1: > vsetvli a5,zero,e8,mf4,ta,ma > vlm.v v0,0(a0) > vsetivli zero,4,e32,m1,ta,mu > vle32.v v3,0(a0) > vle32.v v2,0(a0),v0.t > vmslt.vx v1,v3,a2 > vmnot.m v1,v1 > vmslt.vx v1,v3,a2,v0.t > vmxor.mm v0,v1,v0 > vmsge.vv v2,v2,v2,v0.t > vsm.v v2,0(a1) > ret > > > gcc/ChangeLog: > > * config/riscv/vector.md: Fix redundant vmv1r.v. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/base/binop_vx_constraint-150.c: Adapt assembly check. > > --- > gcc/config/riscv/vector.md | 15 +++++++-------- > .../riscv/rvv/base/binop_vx_constraint-150.c | 2 +- > 2 files changed, 8 insertions(+), 9 deletions(-) > > diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md > index ebb014aecb1..f06d68be80f 100644 > --- a/gcc/config/riscv/vector.md > +++ b/gcc/config/riscv/vector.md > @@ -4111,6 +4111,7 @@ > { > enum rtx_code code = GET_CODE (operands[3]); > rtx undef = RVV_VUNDEF (<VM>mode); > + rtx tmp = gen_reg_rtx (<VM>mode); > if (code == GEU && rtx_equal_p (operands[5], const0_rtx)) > { > /* If vmsgeu with 0 immediate, expand it to vmset. */ > @@ -4157,12 +4158,11 @@ > - pseudoinstruction: vmsge{u}.vx vd, va, x > - expansion: vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd. */ > emit_insn ( > - gen_pred_cmp<mode>_scalar (operands[0], operands[1], operands[2], > + gen_pred_cmp<mode>_scalar (tmp, operands[1], operands[2], > operands[3], operands[4], operands[5], > operands[6], operands[7], operands[8])); > emit_insn (gen_pred_nand<vm> (operands[0], CONSTM1_RTX (<VM>mode), > - undef, operands[0], operands[0], > - operands[6], operands[8])); > + undef, tmp, tmp, operands[6], operands[8])); > } > else > { > @@ -4171,13 +4171,12 @@ > /* masked va >= x, vd == v0 > - pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt > - expansion: vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt. */ > - rtx reg = gen_reg_rtx (<VM>mode); > emit_insn (gen_pred_cmp<mode>_scalar ( > - reg, CONSTM1_RTX (<VM>mode), undef, operands[3], operands[4], > + tmp, CONSTM1_RTX (<VM>mode), undef, operands[3], operands[4], > operands[5], operands[6], operands[7], operands[8])); > emit_insn ( > gen_pred_andnot<vm> (operands[0], CONSTM1_RTX (<VM>mode), undef, > - operands[1], reg, operands[6], operands[8])); > + operands[1], tmp, operands[6], operands[8])); > } > else > { > @@ -4186,10 +4185,10 @@ > - expansion: vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0. > */ > emit_insn (gen_pred_cmp<mode>_scalar ( > - operands[0], operands[1], operands[2], operands[3], operands[4], > + tmp, operands[1], operands[2], operands[3], operands[4], > operands[5], operands[6], operands[7], operands[8])); > emit_insn (gen_pred (XOR, <VM>mode, operands[0], > - CONSTM1_RTX (<VM>mode), undef, operands[0], > + CONSTM1_RTX (<VM>mode), undef, tmp, > operands[1], operands[6], operands[8])); > } > } > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c > index 55a222f47ea..e92a8115f09 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c > @@ -18,4 +18,4 @@ void f1 (void * in, void *out, int32_t x) > /* { dg-final { scan-assembler-times {vmslt\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t} 1 } } */ > /* { dg-final { scan-assembler-times {vmxor\.mm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 1 } } */ > /* { dg-final { scan-assembler-times {vmnot\.m\s+v[0-9]+,\s*v[0-9]+} 1 } } */ > -/* { dg-final { scan-assembler-times {vmv} 1 } } */ > +/* { dg-final { scan-assembler-not {vmv} } } */ > -- > 2.36.1 >
On 3/22/23 06:15, juzhe.zhong@rivai.ai wrote: > From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai> > > Current expansion of vmsge will make RA produce redundant vmv1r.v. > > testcase: > void f1 (void * in, void *out, int32_t x) > { > vbool32_t mask = *(vbool32_t*)in; > asm volatile ("":::"memory"); > vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4); > vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4); > vbool32_t m3 = __riscv_vmsge_vx_i32m1_b32 (v, x, 4); > vbool32_t m4 = __riscv_vmsge_vx_i32m1_b32_mu (mask, m3, v, x, 4); > m4 = __riscv_vmsge_vv_i32m1_b32_m (m4, v2, v2, 4); > __riscv_vsm_v_b32 (out, m4, 4); > } > > Before this patch: > f1: > vsetvli a5,zero,e8,mf4,ta,ma > vlm.v v0,0(a0) > vsetivli zero,4,e32,m1,ta,mu > vle32.v v3,0(a0) > vle32.v v2,0(a0),v0.t > vmslt.vx v1,v3,a2 > vmnot.m v1,v1 > vmslt.vx v1,v3,a2,v0.t > vmxor.mm v1,v1,v0 > vmv1r.v v0,v1 > vmsge.vv v2,v2,v2,v0.t > vsm.v v2,0(a1) > ret > > After this patch: > f1: > vsetvli a5,zero,e8,mf4,ta,ma > vlm.v v0,0(a0) > vsetivli zero,4,e32,m1,ta,mu > vle32.v v3,0(a0) > vle32.v v2,0(a0),v0.t > vmslt.vx v1,v3,a2 > vmnot.m v1,v1 > vmslt.vx v1,v3,a2,v0.t > vmxor.mm v0,v1,v0 > vmsge.vv v2,v2,v2,v0.t > vsm.v v2,0(a1) > ret > > > gcc/ChangeLog: > > * config/riscv/vector.md: Fix redundant vmv1r.v. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/base/binop_vx_constraint-150.c: Adapt assembly check. OK. Please push this to the trunk. jeff
I can push codes yet. Can you push them for me? juzhe.zhong@rivai.ai From: Jeff Law Date: 2023-04-22 04:42 To: juzhe.zhong; gcc-patches CC: kito.cheng; palmer Subject: Re: [PATCH] RISC-V: Fix redundant vmv1r.v instruction in vmsge.vx codegen On 3/22/23 06:15, juzhe.zhong@rivai.ai wrote: > From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai> > > Current expansion of vmsge will make RA produce redundant vmv1r.v. > > testcase: > void f1 (void * in, void *out, int32_t x) > { > vbool32_t mask = *(vbool32_t*)in; > asm volatile ("":::"memory"); > vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4); > vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4); > vbool32_t m3 = __riscv_vmsge_vx_i32m1_b32 (v, x, 4); > vbool32_t m4 = __riscv_vmsge_vx_i32m1_b32_mu (mask, m3, v, x, 4); > m4 = __riscv_vmsge_vv_i32m1_b32_m (m4, v2, v2, 4); > __riscv_vsm_v_b32 (out, m4, 4); > } > > Before this patch: > f1: > vsetvli a5,zero,e8,mf4,ta,ma > vlm.v v0,0(a0) > vsetivli zero,4,e32,m1,ta,mu > vle32.v v3,0(a0) > vle32.v v2,0(a0),v0.t > vmslt.vx v1,v3,a2 > vmnot.m v1,v1 > vmslt.vx v1,v3,a2,v0.t > vmxor.mm v1,v1,v0 > vmv1r.v v0,v1 > vmsge.vv v2,v2,v2,v0.t > vsm.v v2,0(a1) > ret > > After this patch: > f1: > vsetvli a5,zero,e8,mf4,ta,ma > vlm.v v0,0(a0) > vsetivli zero,4,e32,m1,ta,mu > vle32.v v3,0(a0) > vle32.v v2,0(a0),v0.t > vmslt.vx v1,v3,a2 > vmnot.m v1,v1 > vmslt.vx v1,v3,a2,v0.t > vmxor.mm v0,v1,v0 > vmsge.vv v2,v2,v2,v0.t > vsm.v v2,0(a1) > ret > > > gcc/ChangeLog: > > * config/riscv/vector.md: Fix redundant vmv1r.v. > > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/base/binop_vx_constraint-150.c: Adapt assembly check. OK. Please push this to the trunk. jeff
Committed to trunk On Mon, Apr 24, 2023 at 11:09 AM juzhe.zhong@rivai.ai <juzhe.zhong@rivai.ai> wrote: > > I can push codes yet. Can you push them for me? > > > > juzhe.zhong@rivai.ai > > From: Jeff Law > Date: 2023-04-22 04:42 > To: juzhe.zhong; gcc-patches > CC: kito.cheng; palmer > Subject: Re: [PATCH] RISC-V: Fix redundant vmv1r.v instruction in vmsge.vx codegen > > > On 3/22/23 06:15, juzhe.zhong@rivai.ai wrote: > > From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai> > > > > Current expansion of vmsge will make RA produce redundant vmv1r.v. > > > > testcase: > > void f1 (void * in, void *out, int32_t x) > > { > > vbool32_t mask = *(vbool32_t*)in; > > asm volatile ("":::"memory"); > > vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4); > > vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4); > > vbool32_t m3 = __riscv_vmsge_vx_i32m1_b32 (v, x, 4); > > vbool32_t m4 = __riscv_vmsge_vx_i32m1_b32_mu (mask, m3, v, x, 4); > > m4 = __riscv_vmsge_vv_i32m1_b32_m (m4, v2, v2, 4); > > __riscv_vsm_v_b32 (out, m4, 4); > > } > > > > Before this patch: > > f1: > > vsetvli a5,zero,e8,mf4,ta,ma > > vlm.v v0,0(a0) > > vsetivli zero,4,e32,m1,ta,mu > > vle32.v v3,0(a0) > > vle32.v v2,0(a0),v0.t > > vmslt.vx v1,v3,a2 > > vmnot.m v1,v1 > > vmslt.vx v1,v3,a2,v0.t > > vmxor.mm v1,v1,v0 > > vmv1r.v v0,v1 > > vmsge.vv v2,v2,v2,v0.t > > vsm.v v2,0(a1) > > ret > > > > After this patch: > > f1: > > vsetvli a5,zero,e8,mf4,ta,ma > > vlm.v v0,0(a0) > > vsetivli zero,4,e32,m1,ta,mu > > vle32.v v3,0(a0) > > vle32.v v2,0(a0),v0.t > > vmslt.vx v1,v3,a2 > > vmnot.m v1,v1 > > vmslt.vx v1,v3,a2,v0.t > > vmxor.mm v0,v1,v0 > > vmsge.vv v2,v2,v2,v0.t > > vsm.v v2,0(a1) > > ret > > > > > > gcc/ChangeLog: > > > > * config/riscv/vector.md: Fix redundant vmv1r.v. > > > > gcc/testsuite/ChangeLog: > > > > * gcc.target/riscv/rvv/base/binop_vx_constraint-150.c: Adapt assembly check. > OK. Please push this to the trunk. > > jeff >
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md index ebb014aecb1..f06d68be80f 100644 --- a/gcc/config/riscv/vector.md +++ b/gcc/config/riscv/vector.md @@ -4111,6 +4111,7 @@ { enum rtx_code code = GET_CODE (operands[3]); rtx undef = RVV_VUNDEF (<VM>mode); + rtx tmp = gen_reg_rtx (<VM>mode); if (code == GEU && rtx_equal_p (operands[5], const0_rtx)) { /* If vmsgeu with 0 immediate, expand it to vmset. */ @@ -4157,12 +4158,11 @@ - pseudoinstruction: vmsge{u}.vx vd, va, x - expansion: vmslt{u}.vx vd, va, x; vmnand.mm vd, vd, vd. */ emit_insn ( - gen_pred_cmp<mode>_scalar (operands[0], operands[1], operands[2], + gen_pred_cmp<mode>_scalar (tmp, operands[1], operands[2], operands[3], operands[4], operands[5], operands[6], operands[7], operands[8])); emit_insn (gen_pred_nand<vm> (operands[0], CONSTM1_RTX (<VM>mode), - undef, operands[0], operands[0], - operands[6], operands[8])); + undef, tmp, tmp, operands[6], operands[8])); } else { @@ -4171,13 +4171,12 @@ /* masked va >= x, vd == v0 - pseudoinstruction: vmsge{u}.vx vd, va, x, v0.t, vt - expansion: vmslt{u}.vx vt, va, x; vmandn.mm vd, vd, vt. */ - rtx reg = gen_reg_rtx (<VM>mode); emit_insn (gen_pred_cmp<mode>_scalar ( - reg, CONSTM1_RTX (<VM>mode), undef, operands[3], operands[4], + tmp, CONSTM1_RTX (<VM>mode), undef, operands[3], operands[4], operands[5], operands[6], operands[7], operands[8])); emit_insn ( gen_pred_andnot<vm> (operands[0], CONSTM1_RTX (<VM>mode), undef, - operands[1], reg, operands[6], operands[8])); + operands[1], tmp, operands[6], operands[8])); } else { @@ -4186,10 +4185,10 @@ - expansion: vmslt{u}.vx vd, va, x, v0.t; vmxor.mm vd, vd, v0. */ emit_insn (gen_pred_cmp<mode>_scalar ( - operands[0], operands[1], operands[2], operands[3], operands[4], + tmp, operands[1], operands[2], operands[3], operands[4], operands[5], operands[6], operands[7], operands[8])); emit_insn (gen_pred (XOR, <VM>mode, operands[0], - CONSTM1_RTX (<VM>mode), undef, operands[0], + CONSTM1_RTX (<VM>mode), undef, tmp, operands[1], operands[6], operands[8])); } } diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c index 55a222f47ea..e92a8115f09 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/binop_vx_constraint-150.c @@ -18,4 +18,4 @@ void f1 (void * in, void *out, int32_t x) /* { dg-final { scan-assembler-times {vmslt\.vx\s+v[0-9]+,\s*v[0-9]+,\s*[a-x0-9]+,\s*v0.t} 1 } } */ /* { dg-final { scan-assembler-times {vmxor\.mm\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 1 } } */ /* { dg-final { scan-assembler-times {vmnot\.m\s+v[0-9]+,\s*v[0-9]+} 1 } } */ -/* { dg-final { scan-assembler-times {vmv} 1 } } */ +/* { dg-final { scan-assembler-not {vmv} } } */
From: Ju-Zhe Zhong <juzhe.zhong@rivai.ai> Current expansion of vmsge will make RA produce redundant vmv1r.v. testcase: void f1 (void * in, void *out, int32_t x) { vbool32_t mask = *(vbool32_t*)in; asm volatile ("":::"memory"); vint32m1_t v = __riscv_vle32_v_i32m1 (in, 4); vint32m1_t v2 = __riscv_vle32_v_i32m1_m (mask, in, 4); vbool32_t m3 = __riscv_vmsge_vx_i32m1_b32 (v, x, 4); vbool32_t m4 = __riscv_vmsge_vx_i32m1_b32_mu (mask, m3, v, x, 4); m4 = __riscv_vmsge_vv_i32m1_b32_m (m4, v2, v2, 4); __riscv_vsm_v_b32 (out, m4, 4); } Before this patch: f1: vsetvli a5,zero,e8,mf4,ta,ma vlm.v v0,0(a0) vsetivli zero,4,e32,m1,ta,mu vle32.v v3,0(a0) vle32.v v2,0(a0),v0.t vmslt.vx v1,v3,a2 vmnot.m v1,v1 vmslt.vx v1,v3,a2,v0.t vmxor.mm v1,v1,v0 vmv1r.v v0,v1 vmsge.vv v2,v2,v2,v0.t vsm.v v2,0(a1) ret After this patch: f1: vsetvli a5,zero,e8,mf4,ta,ma vlm.v v0,0(a0) vsetivli zero,4,e32,m1,ta,mu vle32.v v3,0(a0) vle32.v v2,0(a0),v0.t vmslt.vx v1,v3,a2 vmnot.m v1,v1 vmslt.vx v1,v3,a2,v0.t vmxor.mm v0,v1,v0 vmsge.vv v2,v2,v2,v0.t vsm.v v2,0(a1) ret gcc/ChangeLog: * config/riscv/vector.md: Fix redundant vmv1r.v. gcc/testsuite/ChangeLog: * gcc.target/riscv/rvv/base/binop_vx_constraint-150.c: Adapt assembly check. --- gcc/config/riscv/vector.md | 15 +++++++-------- .../riscv/rvv/base/binop_vx_constraint-150.c | 2 +- 2 files changed, 8 insertions(+), 9 deletions(-)