Message ID | 20230908085419.494384-1-lehua.ding@rivai.ai |
---|---|
State | New |
Headers | show |
Series | [V3] Support folding min(poly,poly) to const | expand |
Lehua Ding <lehua.ding@rivai.ai> writes: > V3 change: Address Richard's comments. > > Hi, > > This patch adds support that tries to fold `MIN (poly, poly)` to > a constant. Consider the following C Code: > > ``` > void foo2 (int* restrict a, int* restrict b, int n) > { > for (int i = 0; i < 3; i += 1) > a[i] += b[i]; > } > ``` > > Before this patch: > > ``` > void foo2 (int * restrict a, int * restrict b, int n) > { > vector([4,4]) int vect__7.27; > vector([4,4]) int vect__6.26; > vector([4,4]) int vect__4.23; > unsigned long _32; > > <bb 2> [local count: 268435456]: > _32 = MIN_EXPR <3, POLY_INT_CST [4, 4]>; > vect__4.23_20 = .MASK_LEN_LOAD (a_11(D), 32B, { -1, ... }, _32, 0); > vect__6.26_15 = .MASK_LEN_LOAD (b_12(D), 32B, { -1, ... }, _32, 0); > vect__7.27_9 = vect__6.26_15 + vect__4.23_20; > .MASK_LEN_STORE (a_11(D), 32B, { -1, ... }, _32, 0, vect__7.27_9); [tail call] > return; > > } > ``` > > After this patch: > > ``` > void foo2 (int * restrict a, int * restrict b, int n) > { > vector([4,4]) int vect__7.27; > vector([4,4]) int vect__6.26; > vector([4,4]) int vect__4.23; > > <bb 2> [local count: 268435456]: > vect__4.23_20 = .MASK_LEN_LOAD (a_11(D), 32B, { -1, ... }, 3, 0); > vect__6.26_15 = .MASK_LEN_LOAD (b_12(D), 32B, { -1, ... }, 3, 0); > vect__7.27_9 = vect__6.26_15 + vect__4.23_20; > .MASK_LEN_STORE (a_11(D), 32B, { -1, ... }, 3, 0, vect__7.27_9); [tail call] > return; > > } > ``` > > For RISC-V RVV, csrr and branch instructions can be reduced: > > Before this patch: > > ``` > foo2: > csrr a4,vlenb > srli a4,a4,2 > li a5,3 > bleu a5,a4,.L5 > mv a5,a4 > .L5: > vsetvli zero,a5,e32,m1,ta,ma > ... > ``` > > After this patch. > > ``` > foo2: > vsetivli zero,3,e32,m1,ta,ma > ... > ``` > > Best, > Lehua > > gcc/ChangeLog: > > * fold-const.cc (can_min_p): New function. > (poly_int_binop): Try fold MIN_EXPR. OK, thanks. Richard > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/autovec/vls/div-1.c: Adjust. > * gcc.target/riscv/rvv/autovec/vls/shift-3.c: Adjust. > * gcc.target/riscv/rvv/autovec/fold-min-poly.c: New test. > > --- > gcc/fold-const.cc | 24 +++++++++++++++++++ > .../riscv/rvv/autovec/fold-min-poly.c | 24 +++++++++++++++++++ > .../gcc.target/riscv/rvv/autovec/vls/div-1.c | 2 +- > .../riscv/rvv/autovec/vls/shift-3.c | 2 +- > 4 files changed, 50 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c > > diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc > index 1da498a3152..d19b4666c65 100644 > --- a/gcc/fold-const.cc > +++ b/gcc/fold-const.cc > @@ -1213,6 +1213,25 @@ wide_int_binop (wide_int &res, > return true; > } > > +/* Returns true if we know who is smaller or equal, ARG1 or ARG2, and set the > + min value to RES. */ > +bool > +can_min_p (const_tree arg1, const_tree arg2, poly_wide_int &res) > +{ > + if (known_le (wi::to_poly_widest (arg1), wi::to_poly_widest (arg2))) > + { > + res = wi::to_poly_wide (arg1); > + return true; > + } > + else if (known_le (wi::to_poly_widest (arg2), wi::to_poly_widest (arg1))) > + { > + res = wi::to_poly_wide (arg2); > + return true; > + } > + > + return false; > +} > + > /* Combine two poly int's ARG1 and ARG2 under operation CODE to > produce a new constant in RES. Return FALSE if we don't know how > to evaluate CODE at compile-time. */ > @@ -1261,6 +1280,11 @@ poly_int_binop (poly_wide_int &res, enum tree_code code, > return false; > break; > > + case MIN_EXPR: > + if (!can_min_p (arg1, arg2, res)) > + return false; > + break; > + > default: > return false; > } > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c > new file mode 100644 > index 00000000000..de4c472c76e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c > @@ -0,0 +1,24 @@ > +/* { dg-do compile } */ > +/* { dg-options " -march=rv64gcv_zvl128b -mabi=lp64d -O3 --param riscv-autovec-preference=scalable --param riscv-autovec-lmul=m1 -fno-vect-cost-model" } */ > + > +void foo1 (int* restrict a, int* restrict b, int n) > +{ > + for (int i = 0; i < 4; i += 1) > + a[i] += b[i]; > +} > + > +void foo2 (int* restrict a, int* restrict b, int n) > +{ > + for (int i = 0; i < 3; i += 1) > + a[i] += b[i]; > +} > + > +void foo3 (int* restrict a, int* restrict b, int n) > +{ > + for (int i = 0; i < 5; i += 1) > + a[i] += b[i]; > +} > + > +/* { dg-final { scan-assembler-not {\tcsrr\t} } } */ > +/* { dg-final { scan-assembler {\tvsetivli\tzero,4,e32,m1,t[au],m[au]} } } */ > +/* { dg-final { scan-assembler {\tvsetivli\tzero,3,e32,m1,t[au],m[au]} } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c > index f3388a86e38..40224c69458 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c > @@ -55,4 +55,4 @@ DEF_OP_VV (div, 512, int64_t, /) > > /* { dg-final { scan-assembler-times {vdivu?\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 42 } } */ > /* TODO: Ideally, we should make sure there is no "csrr vlenb". However, we still have 'csrr vlenb' for some cases since we don't support VLS mode conversion which are needed by division. */ > -/* { dg-final { scan-assembler-times {csrr} 19 } } */ > +/* { dg-final { scan-assembler-not {csrr} } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c > index 98822b15657..b34a349949b 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c > @@ -55,4 +55,4 @@ DEF_OP_VV (shift, 512, int64_t, <<) > > /* { dg-final { scan-assembler-times {vsll\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 41 } } */ > /* TODO: Ideally, we should make sure there is no "csrr vlenb". However, we still have 'csrr vlenb' for some cases since we don't support VLS mode conversion which are needed by division. */ > -/* { dg-final { scan-assembler-times {csrr} 18 } } */ > +/* { dg-final { scan-assembler-not {csrr} } } */
Thanks Richard. LGTM again from RISC-V side :). juzhe.zhong@rivai.ai From: Richard Sandiford Date: 2023-09-08 16:56 To: Lehua Ding CC: gcc-patches; juzhe.zhong Subject: Re: [PATCH V3] Support folding min(poly,poly) to const Lehua Ding <lehua.ding@rivai.ai> writes: > V3 change: Address Richard's comments. > > Hi, > > This patch adds support that tries to fold `MIN (poly, poly)` to > a constant. Consider the following C Code: > > ``` > void foo2 (int* restrict a, int* restrict b, int n) > { > for (int i = 0; i < 3; i += 1) > a[i] += b[i]; > } > ``` > > Before this patch: > > ``` > void foo2 (int * restrict a, int * restrict b, int n) > { > vector([4,4]) int vect__7.27; > vector([4,4]) int vect__6.26; > vector([4,4]) int vect__4.23; > unsigned long _32; > > <bb 2> [local count: 268435456]: > _32 = MIN_EXPR <3, POLY_INT_CST [4, 4]>; > vect__4.23_20 = .MASK_LEN_LOAD (a_11(D), 32B, { -1, ... }, _32, 0); > vect__6.26_15 = .MASK_LEN_LOAD (b_12(D), 32B, { -1, ... }, _32, 0); > vect__7.27_9 = vect__6.26_15 + vect__4.23_20; > .MASK_LEN_STORE (a_11(D), 32B, { -1, ... }, _32, 0, vect__7.27_9); [tail call] > return; > > } > ``` > > After this patch: > > ``` > void foo2 (int * restrict a, int * restrict b, int n) > { > vector([4,4]) int vect__7.27; > vector([4,4]) int vect__6.26; > vector([4,4]) int vect__4.23; > > <bb 2> [local count: 268435456]: > vect__4.23_20 = .MASK_LEN_LOAD (a_11(D), 32B, { -1, ... }, 3, 0); > vect__6.26_15 = .MASK_LEN_LOAD (b_12(D), 32B, { -1, ... }, 3, 0); > vect__7.27_9 = vect__6.26_15 + vect__4.23_20; > .MASK_LEN_STORE (a_11(D), 32B, { -1, ... }, 3, 0, vect__7.27_9); [tail call] > return; > > } > ``` > > For RISC-V RVV, csrr and branch instructions can be reduced: > > Before this patch: > > ``` > foo2: > csrr a4,vlenb > srli a4,a4,2 > li a5,3 > bleu a5,a4,.L5 > mv a5,a4 > .L5: > vsetvli zero,a5,e32,m1,ta,ma > ... > ``` > > After this patch. > > ``` > foo2: > vsetivli zero,3,e32,m1,ta,ma > ... > ``` > > Best, > Lehua > > gcc/ChangeLog: > > * fold-const.cc (can_min_p): New function. > (poly_int_binop): Try fold MIN_EXPR. OK, thanks. Richard > gcc/testsuite/ChangeLog: > > * gcc.target/riscv/rvv/autovec/vls/div-1.c: Adjust. > * gcc.target/riscv/rvv/autovec/vls/shift-3.c: Adjust. > * gcc.target/riscv/rvv/autovec/fold-min-poly.c: New test. > > --- > gcc/fold-const.cc | 24 +++++++++++++++++++ > .../riscv/rvv/autovec/fold-min-poly.c | 24 +++++++++++++++++++ > .../gcc.target/riscv/rvv/autovec/vls/div-1.c | 2 +- > .../riscv/rvv/autovec/vls/shift-3.c | 2 +- > 4 files changed, 50 insertions(+), 2 deletions(-) > create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c > > diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc > index 1da498a3152..d19b4666c65 100644 > --- a/gcc/fold-const.cc > +++ b/gcc/fold-const.cc > @@ -1213,6 +1213,25 @@ wide_int_binop (wide_int &res, > return true; > } > > +/* Returns true if we know who is smaller or equal, ARG1 or ARG2, and set the > + min value to RES. */ > +bool > +can_min_p (const_tree arg1, const_tree arg2, poly_wide_int &res) > +{ > + if (known_le (wi::to_poly_widest (arg1), wi::to_poly_widest (arg2))) > + { > + res = wi::to_poly_wide (arg1); > + return true; > + } > + else if (known_le (wi::to_poly_widest (arg2), wi::to_poly_widest (arg1))) > + { > + res = wi::to_poly_wide (arg2); > + return true; > + } > + > + return false; > +} > + > /* Combine two poly int's ARG1 and ARG2 under operation CODE to > produce a new constant in RES. Return FALSE if we don't know how > to evaluate CODE at compile-time. */ > @@ -1261,6 +1280,11 @@ poly_int_binop (poly_wide_int &res, enum tree_code code, > return false; > break; > > + case MIN_EXPR: > + if (!can_min_p (arg1, arg2, res)) > + return false; > + break; > + > default: > return false; > } > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c > new file mode 100644 > index 00000000000..de4c472c76e > --- /dev/null > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c > @@ -0,0 +1,24 @@ > +/* { dg-do compile } */ > +/* { dg-options " -march=rv64gcv_zvl128b -mabi=lp64d -O3 --param riscv-autovec-preference=scalable --param riscv-autovec-lmul=m1 -fno-vect-cost-model" } */ > + > +void foo1 (int* restrict a, int* restrict b, int n) > +{ > + for (int i = 0; i < 4; i += 1) > + a[i] += b[i]; > +} > + > +void foo2 (int* restrict a, int* restrict b, int n) > +{ > + for (int i = 0; i < 3; i += 1) > + a[i] += b[i]; > +} > + > +void foo3 (int* restrict a, int* restrict b, int n) > +{ > + for (int i = 0; i < 5; i += 1) > + a[i] += b[i]; > +} > + > +/* { dg-final { scan-assembler-not {\tcsrr\t} } } */ > +/* { dg-final { scan-assembler {\tvsetivli\tzero,4,e32,m1,t[au],m[au]} } } */ > +/* { dg-final { scan-assembler {\tvsetivli\tzero,3,e32,m1,t[au],m[au]} } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c > index f3388a86e38..40224c69458 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c > @@ -55,4 +55,4 @@ DEF_OP_VV (div, 512, int64_t, /) > > /* { dg-final { scan-assembler-times {vdivu?\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 42 } } */ > /* TODO: Ideally, we should make sure there is no "csrr vlenb". However, we still have 'csrr vlenb' for some cases since we don't support VLS mode conversion which are needed by division. */ > -/* { dg-final { scan-assembler-times {csrr} 19 } } */ > +/* { dg-final { scan-assembler-not {csrr} } } */ > diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c > index 98822b15657..b34a349949b 100644 > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c > @@ -55,4 +55,4 @@ DEF_OP_VV (shift, 512, int64_t, <<) > > /* { dg-final { scan-assembler-times {vsll\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 41 } } */ > /* TODO: Ideally, we should make sure there is no "csrr vlenb". However, we still have 'csrr vlenb' for some cases since we don't support VLS mode conversion which are needed by division. */ > -/* { dg-final { scan-assembler-times {csrr} 18 } } */ > +/* { dg-final { scan-assembler-not {csrr} } } */
Committed, thanks Richard and Juzhe. On 2023/9/8 16:57, 钟居哲 wrote: > Thanks Richard. > LGTM again from RISC-V side :). > > ------------------------------------------------------------------------ > juzhe.zhong@rivai.ai > > *From:* Richard Sandiford <mailto:richard.sandiford@arm.com> > *Date:* 2023-09-08 16:56 > *To:* Lehua Ding <mailto:lehua.ding@rivai.ai> > *CC:* gcc-patches <mailto:gcc-patches@gcc.gnu.org>; juzhe.zhong > <mailto:juzhe.zhong@rivai.ai> > *Subject:* Re: [PATCH V3] Support folding min(poly,poly) to const > Lehua Ding <lehua.ding@rivai.ai> writes: > > V3 change: Address Richard's comments. > > > > Hi, > > > > This patch adds support that tries to fold `MIN (poly, poly)` to > > a constant. Consider the following C Code: > > > > ``` > > void foo2 (int* restrict a, int* restrict b, int n) > > { > > for (int i = 0; i < 3; i += 1) > > a[i] += b[i]; > > } > > ``` > > > > Before this patch: > > > > ``` > > void foo2 (int * restrict a, int * restrict b, int n) > > { > > vector([4,4]) int vect__7.27; > > vector([4,4]) int vect__6.26; > > vector([4,4]) int vect__4.23; > > unsigned long _32; > > > > <bb 2> [local count: 268435456]: > > _32 = MIN_EXPR <3, POLY_INT_CST [4, 4]>; > > vect__4.23_20 = .MASK_LEN_LOAD (a_11(D), 32B, { -1, ... }, _32, 0); > > vect__6.26_15 = .MASK_LEN_LOAD (b_12(D), 32B, { -1, ... }, _32, 0); > > vect__7.27_9 = vect__6.26_15 + vect__4.23_20; > > .MASK_LEN_STORE (a_11(D), 32B, { -1, ... }, _32, 0, > vect__7.27_9); [tail call] > > return; > > > > } > > ``` > > > > After this patch: > > > > ``` > > void foo2 (int * restrict a, int * restrict b, int n) > > { > > vector([4,4]) int vect__7.27; > > vector([4,4]) int vect__6.26; > > vector([4,4]) int vect__4.23; > > > > <bb 2> [local count: 268435456]: > > vect__4.23_20 = .MASK_LEN_LOAD (a_11(D), 32B, { -1, ... }, 3, 0); > > vect__6.26_15 = .MASK_LEN_LOAD (b_12(D), 32B, { -1, ... }, 3, 0); > > vect__7.27_9 = vect__6.26_15 + vect__4.23_20; > > .MASK_LEN_STORE (a_11(D), 32B, { -1, ... }, 3, 0, > vect__7.27_9); [tail call] > > return; > > > > } > > ``` > > > > For RISC-V RVV, csrr and branch instructions can be reduced: > > > > Before this patch: > > > > ``` > > foo2: > > csrr a4,vlenb > > srli a4,a4,2 > > li a5,3 > > bleu a5,a4,.L5 > > mv a5,a4 > > .L5: > > vsetvli zero,a5,e32,m1,ta,ma > > ... > > ``` > > > > After this patch. > > > > ``` > > foo2: > > vsetivli zero,3,e32,m1,ta,ma > > ... > > ``` > > > > Best, > > Lehua > > > > gcc/ChangeLog: > > > > * fold-const.cc (can_min_p): New function. > > (poly_int_binop): Try fold MIN_EXPR. > OK, thanks. > Richard > > gcc/testsuite/ChangeLog: > > > > * gcc.target/riscv/rvv/autovec/vls/div-1.c: Adjust. > > * gcc.target/riscv/rvv/autovec/vls/shift-3.c: Adjust. > > * gcc.target/riscv/rvv/autovec/fold-min-poly.c: New test. > > > > --- > > gcc/fold-const.cc | 24 > +++++++++++++++++++ > > .../riscv/rvv/autovec/fold-min-poly.c | 24 > +++++++++++++++++++ > > .../gcc.target/riscv/rvv/autovec/vls/div-1.c | 2 +- > > .../riscv/rvv/autovec/vls/shift-3.c | 2 +- > > 4 files changed, 50 insertions(+), 2 deletions(-) > > create mode 100644 > gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c > > > > diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc > > index 1da498a3152..d19b4666c65 100644 > > --- a/gcc/fold-const.cc > > +++ b/gcc/fold-const.cc > > @@ -1213,6 +1213,25 @@ wide_int_binop (wide_int &res, > > return true; > > } > > > > +/* Returns true if we know who is smaller or equal, ARG1 or > ARG2, and set the > > + min value to RES. */ > > +bool > > +can_min_p (const_tree arg1, const_tree arg2, poly_wide_int &res) > > +{ > > + if (known_le (wi::to_poly_widest (arg1), wi::to_poly_widest > (arg2))) > > + { > > + res = wi::to_poly_wide (arg1); > > + return true; > > + } > > + else if (known_le (wi::to_poly_widest (arg2), > wi::to_poly_widest (arg1))) > > + { > > + res = wi::to_poly_wide (arg2); > > + return true; > > + } > > + > > + return false; > > +} > > + > > /* Combine two poly int's ARG1 and ARG2 under operation CODE to > > produce a new constant in RES. Return FALSE if we don't know how > > to evaluate CODE at compile-time. */ > > @@ -1261,6 +1280,11 @@ poly_int_binop (poly_wide_int &res, enum > tree_code code, > > return false; > > break; > > > > + case MIN_EXPR: > > + if (!can_min_p (arg1, arg2, res)) > > + return false; > > + break; > > + > > default: > > return false; > > } > > diff --git > a/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c > > new file mode 100644 > > index 00000000000..de4c472c76e > > --- /dev/null > > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c > > @@ -0,0 +1,24 @@ > > +/* { dg-do compile } */ > > +/* { dg-options " -march=rv64gcv_zvl128b -mabi=lp64d -O3 --param > riscv-autovec-preference=scalable --param riscv-autovec-lmul=m1 > -fno-vect-cost-model" } */ > > + > > +void foo1 (int* restrict a, int* restrict b, int n) > > +{ > > + for (int i = 0; i < 4; i += 1) > > + a[i] += b[i]; > > +} > > + > > +void foo2 (int* restrict a, int* restrict b, int n) > > +{ > > + for (int i = 0; i < 3; i += 1) > > + a[i] += b[i]; > > +} > > + > > +void foo3 (int* restrict a, int* restrict b, int n) > > +{ > > + for (int i = 0; i < 5; i += 1) > > + a[i] += b[i]; > > +} > > + > > +/* { dg-final { scan-assembler-not {\tcsrr\t} } } */ > > +/* { dg-final { scan-assembler > {\tvsetivli\tzero,4,e32,m1,t[au],m[au]} } } */ > > +/* { dg-final { scan-assembler > {\tvsetivli\tzero,3,e32,m1,t[au],m[au]} } } */ > > diff --git > a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c > > index f3388a86e38..40224c69458 100644 > > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c > > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c > > @@ -55,4 +55,4 @@ DEF_OP_VV (div, 512, int64_t, /) > > > > /* { dg-final { scan-assembler-times > {vdivu?\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 42 } } */ > > /* TODO: Ideally, we should make sure there is no "csrr vlenb". > However, we still have 'csrr vlenb' for some cases since we don't > support VLS mode conversion which are needed by division. */ > > -/* { dg-final { scan-assembler-times {csrr} 19 } } */ > > +/* { dg-final { scan-assembler-not {csrr} } } */ > > diff --git > a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c > b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c > > index 98822b15657..b34a349949b 100644 > > --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c > > +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c > > @@ -55,4 +55,4 @@ DEF_OP_VV (shift, 512, int64_t, <<) > > > > /* { dg-final { scan-assembler-times > {vsll\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 41 } } */ > > /* TODO: Ideally, we should make sure there is no "csrr vlenb". > However, we still have 'csrr vlenb' for some cases since we don't > support VLS mode conversion which are needed by division. */ > > -/* { dg-final { scan-assembler-times {csrr} 18 } } */ > > +/* { dg-final { scan-assembler-not {csrr} } } */ >
diff --git a/gcc/fold-const.cc b/gcc/fold-const.cc index 1da498a3152..d19b4666c65 100644 --- a/gcc/fold-const.cc +++ b/gcc/fold-const.cc @@ -1213,6 +1213,25 @@ wide_int_binop (wide_int &res, return true; } +/* Returns true if we know who is smaller or equal, ARG1 or ARG2, and set the + min value to RES. */ +bool +can_min_p (const_tree arg1, const_tree arg2, poly_wide_int &res) +{ + if (known_le (wi::to_poly_widest (arg1), wi::to_poly_widest (arg2))) + { + res = wi::to_poly_wide (arg1); + return true; + } + else if (known_le (wi::to_poly_widest (arg2), wi::to_poly_widest (arg1))) + { + res = wi::to_poly_wide (arg2); + return true; + } + + return false; +} + /* Combine two poly int's ARG1 and ARG2 under operation CODE to produce a new constant in RES. Return FALSE if we don't know how to evaluate CODE at compile-time. */ @@ -1261,6 +1280,11 @@ poly_int_binop (poly_wide_int &res, enum tree_code code, return false; break; + case MIN_EXPR: + if (!can_min_p (arg1, arg2, res)) + return false; + break; + default: return false; } diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c new file mode 100644 index 00000000000..de4c472c76e --- /dev/null +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/fold-min-poly.c @@ -0,0 +1,24 @@ +/* { dg-do compile } */ +/* { dg-options " -march=rv64gcv_zvl128b -mabi=lp64d -O3 --param riscv-autovec-preference=scalable --param riscv-autovec-lmul=m1 -fno-vect-cost-model" } */ + +void foo1 (int* restrict a, int* restrict b, int n) +{ + for (int i = 0; i < 4; i += 1) + a[i] += b[i]; +} + +void foo2 (int* restrict a, int* restrict b, int n) +{ + for (int i = 0; i < 3; i += 1) + a[i] += b[i]; +} + +void foo3 (int* restrict a, int* restrict b, int n) +{ + for (int i = 0; i < 5; i += 1) + a[i] += b[i]; +} + +/* { dg-final { scan-assembler-not {\tcsrr\t} } } */ +/* { dg-final { scan-assembler {\tvsetivli\tzero,4,e32,m1,t[au],m[au]} } } */ +/* { dg-final { scan-assembler {\tvsetivli\tzero,3,e32,m1,t[au],m[au]} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c index f3388a86e38..40224c69458 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/div-1.c @@ -55,4 +55,4 @@ DEF_OP_VV (div, 512, int64_t, /) /* { dg-final { scan-assembler-times {vdivu?\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 42 } } */ /* TODO: Ideally, we should make sure there is no "csrr vlenb". However, we still have 'csrr vlenb' for some cases since we don't support VLS mode conversion which are needed by division. */ -/* { dg-final { scan-assembler-times {csrr} 19 } } */ +/* { dg-final { scan-assembler-not {csrr} } } */ diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c index 98822b15657..b34a349949b 100644 --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/vls/shift-3.c @@ -55,4 +55,4 @@ DEF_OP_VV (shift, 512, int64_t, <<) /* { dg-final { scan-assembler-times {vsll\.vv\s+v[0-9]+,\s*v[0-9]+,\s*v[0-9]+} 41 } } */ /* TODO: Ideally, we should make sure there is no "csrr vlenb". However, we still have 'csrr vlenb' for some cases since we don't support VLS mode conversion which are needed by division. */ -/* { dg-final { scan-assembler-times {csrr} 18 } } */ +/* { dg-final { scan-assembler-not {csrr} } } */