Message ID | 81360e11-d013-4443-93ca-d76598b957e2@linux.ibm.com |
---|---|
State | New |
Headers | show |
Series | [v2] Add handling of MULT_EXPR/PLUS_EXPR for wrapping overflow in affine combination(PR83403) | expand |
On Thu, 30 Apr 2020, luoxhu wrote: > Update the patch with overflow check. Bootstrap and regression tested PASS on Power8-LE. > > > Use determine_value_range to get value range info for fold convert expressions > with internal operation PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow on > wrapping overflow inner type. i.e.: > > (long unsigned int)((unsigned int)n * 10 + 1) > => > (long unsigned int)n * (long unsigned int)10 + (long unsigned int)1 > > With this patch for affine combination, load/store motion could detect > more address refs independency and promote some memory expressions to > registers within loop. > > PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))" > to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow. This is OK for trunk if bootstrapped / tested properl. Thanks, Richard. > gcc/ChangeLog > > 2020-04-30 Xiong Hu Luo <luoxhu@linux.ibm.com> > > PR tree-optimization/83403 > * tree-affine.c (expr_to_aff_combination): Replace SSA_NAME with > determine_value_range, Add fold conversion of MULT_EXPR, fix the > previous PLUS_EXPR. > > gcc/testsuite/ChangeLog > > 2020-04-30 Xiong Hu Luo <luoxhu@linux.ibm.com> > > PR tree-optimization/83403 > * gcc.dg/tree-ssa/pr83403-1.c: New test. > * gcc.dg/tree-ssa/pr83403-2.c: New test. > * gcc.dg/tree-ssa/pr83403.h: New header. > --- > gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c | 8 ++++++ > gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c | 8 ++++++ > gcc/testsuite/gcc.dg/tree-ssa/pr83403.h | 30 +++++++++++++++++++++++ > gcc/tree-affine.c | 24 ++++++++++-------- > 4 files changed, 60 insertions(+), 10 deletions(-) > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c > create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403.h > > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c > new file mode 100644 > index 00000000000..748375b03af > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c > @@ -0,0 +1,8 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */ > + > +#define TYPE unsigned int > + > +#include "pr83403.h" > + > +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c > new file mode 100644 > index 00000000000..ca2e6bbd61c > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c > @@ -0,0 +1,8 @@ > +/* { dg-do compile } */ > +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */ > + > +#define TYPE int > + > +#include "pr83403.h" > + > +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */ > diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h > new file mode 100644 > index 00000000000..0da8a835b5f > --- /dev/null > +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h > @@ -0,0 +1,30 @@ > +__attribute__ ((noinline)) void > +calculate (const double *__restrict__ A, const double *__restrict__ B, > + double *__restrict__ C) > +{ > + TYPE m = 0; > + TYPE n = 0; > + TYPE k = 0; > + > + A = (const double *) __builtin_assume_aligned (A, 16); > + B = (const double *) __builtin_assume_aligned (B, 16); > + C = (double *) __builtin_assume_aligned (C, 16); > + > + for (n = 0; n < 9; n++) > + { > + for (m = 0; m < 10; m++) > + { > + C[(n * 10) + m] = 0.0; > + } > + > + for (k = 0; k < 17; k++) > + { > +#pragma simd > + for (m = 0; m < 10; m++) > + { > + C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k]; > + } > + } > + } > +} > + > diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c > index 0eb8db1b086..5620e6bf28f 100644 > --- a/gcc/tree-affine.c > +++ b/gcc/tree-affine.c > @@ -343,24 +343,28 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type, > wide_int minv, maxv; > /* If inner type has wrapping overflow behavior, fold conversion > for below case: > - (T1)(X - CST) -> (T1)X - (T1)CST > - if X - CST doesn't overflow by range information. Also handle > - (T1)(X + CST) as (T1)(X - (-CST)). */ > + (T1)(X *+- CST) -> (T1)X *+- (T1)CST > + if X *+- CST doesn't overflow by range information. */ > if (TYPE_UNSIGNED (itype) > && TYPE_OVERFLOW_WRAPS (itype) > - && TREE_CODE (op0) == SSA_NAME > && TREE_CODE (op1) == INTEGER_CST > - && icode != MULT_EXPR > - && get_range_info (op0, &minv, &maxv) == VR_RANGE) > + && determine_value_range (op0, &minv, &maxv) == VR_RANGE) > { > + wi::overflow_type overflow = wi::OVF_NONE; > + signop sign = UNSIGNED; > if (icode == PLUS_EXPR) > - op1 = wide_int_to_tree (itype, -wi::to_wide (op1)); > - if (wi::geu_p (minv, wi::to_wide (op1))) > + wi::add (maxv, wi::to_wide (op1), sign, &overflow); > + else if (icode == MULT_EXPR) > + wi::mul (maxv, wi::to_wide (op1), sign, &overflow); > + else > + wi::sub (minv, wi::to_wide (op1), sign, &overflow); > + > + if (overflow == wi::OVF_NONE) > { > op0 = fold_convert (otype, op0); > op1 = fold_convert (otype, op1); > - return expr_to_aff_combination (comb, MINUS_EXPR, otype, > - op0, op1); > + return expr_to_aff_combination (comb, icode, otype, op0, > + op1); > }1 > } > } >
在 2020-05-06 20:09,Richard Biener 写道: > On Thu, 30 Apr 2020, luoxhu wrote: > >> Update the patch with overflow check. Bootstrap and regression tested >> PASS on Power8-LE. >> >> >> Use determine_value_range to get value range info for fold convert >> expressions >> with internal operation PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not >> overflow on >> wrapping overflow inner type. i.e.: >> >> (long unsigned int)((unsigned int)n * 10 + 1) >> => >> (long unsigned int)n * (long unsigned int)10 + (long unsigned int)1 >> >> With this patch for affine combination, load/store motion could detect >> more address refs independency and promote some memory expressions to >> registers within loop. >> >> PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))" >> to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow. > > This is OK for trunk if bootstrapped / tested properl. Bootstrap and regression tested pass on power8LE, committed to r11-259-g0447929f11e6a3e1b076841712b90a8b6bc7d33a, is it necessary to backport it to gcc-10&gcc-9? Thanks, Xionghu > > Thanks, > Richard. > >> gcc/ChangeLog >> >> 2020-04-30 Xiong Hu Luo <luoxhu@linux.ibm.com> >> >> PR tree-optimization/83403 >> * tree-affine.c (expr_to_aff_combination): Replace SSA_NAME with >> determine_value_range, Add fold conversion of MULT_EXPR, fix the >> previous PLUS_EXPR. >> >> gcc/testsuite/ChangeLog >> >> 2020-04-30 Xiong Hu Luo <luoxhu@linux.ibm.com> >> >> PR tree-optimization/83403 >> * gcc.dg/tree-ssa/pr83403-1.c: New test. >> * gcc.dg/tree-ssa/pr83403-2.c: New test. >> * gcc.dg/tree-ssa/pr83403.h: New header. >> --- >> gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c | 8 ++++++ >> gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c | 8 ++++++ >> gcc/testsuite/gcc.dg/tree-ssa/pr83403.h | 30 >> +++++++++++++++++++++++ >> gcc/tree-affine.c | 24 ++++++++++-------- >> 4 files changed, 60 insertions(+), 10 deletions(-) >> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c >> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c >> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403.h >> >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c >> b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c >> new file mode 100644 >> index 00000000000..748375b03af >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c >> @@ -0,0 +1,8 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */ >> + >> +#define TYPE unsigned int >> + >> +#include "pr83403.h" >> + >> +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 >> "lim2" } } */ >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c >> b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c >> new file mode 100644 >> index 00000000000..ca2e6bbd61c >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c >> @@ -0,0 +1,8 @@ >> +/* { dg-do compile } */ >> +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */ >> + >> +#define TYPE int >> + >> +#include "pr83403.h" >> + >> +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 >> "lim2" } } */ >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h >> b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h >> new file mode 100644 >> index 00000000000..0da8a835b5f >> --- /dev/null >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h >> @@ -0,0 +1,30 @@ >> +__attribute__ ((noinline)) void >> +calculate (const double *__restrict__ A, const double *__restrict__ >> B, >> + double *__restrict__ C) >> +{ >> + TYPE m = 0; >> + TYPE n = 0; >> + TYPE k = 0; >> + >> + A = (const double *) __builtin_assume_aligned (A, 16); >> + B = (const double *) __builtin_assume_aligned (B, 16); >> + C = (double *) __builtin_assume_aligned (C, 16); >> + >> + for (n = 0; n < 9; n++) >> + { >> + for (m = 0; m < 10; m++) >> + { >> + C[(n * 10) + m] = 0.0; >> + } >> + >> + for (k = 0; k < 17; k++) >> + { >> +#pragma simd >> + for (m = 0; m < 10; m++) >> + { >> + C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k]; >> + } >> + } >> + } >> +} >> + >> diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c >> index 0eb8db1b086..5620e6bf28f 100644 >> --- a/gcc/tree-affine.c >> +++ b/gcc/tree-affine.c >> @@ -343,24 +343,28 @@ expr_to_aff_combination (aff_tree *comb, >> tree_code code, tree type, >> wide_int minv, maxv; >> /* If inner type has wrapping overflow behavior, fold conversion >> for below case: >> - (T1)(X - CST) -> (T1)X - (T1)CST >> - if X - CST doesn't overflow by range information. Also >> handle >> - (T1)(X + CST) as (T1)(X - (-CST)). */ >> + (T1)(X *+- CST) -> (T1)X *+- (T1)CST >> + if X *+- CST doesn't overflow by range information. */ >> if (TYPE_UNSIGNED (itype) >> && TYPE_OVERFLOW_WRAPS (itype) >> - && TREE_CODE (op0) == SSA_NAME >> && TREE_CODE (op1) == INTEGER_CST >> - && icode != MULT_EXPR >> - && get_range_info (op0, &minv, &maxv) == VR_RANGE) >> + && determine_value_range (op0, &minv, &maxv) == VR_RANGE) >> { >> + wi::overflow_type overflow = wi::OVF_NONE; >> + signop sign = UNSIGNED; >> if (icode == PLUS_EXPR) >> - op1 = wide_int_to_tree (itype, -wi::to_wide (op1)); >> - if (wi::geu_p (minv, wi::to_wide (op1))) >> + wi::add (maxv, wi::to_wide (op1), sign, &overflow); >> + else if (icode == MULT_EXPR) >> + wi::mul (maxv, wi::to_wide (op1), sign, &overflow); >> + else >> + wi::sub (minv, wi::to_wide (op1), sign, &overflow); >> + >> + if (overflow == wi::OVF_NONE) >> { >> op0 = fold_convert (otype, op0); >> op1 = fold_convert (otype, op1); >> - return expr_to_aff_combination (comb, MINUS_EXPR, otype, >> - op0, op1); >> + return expr_to_aff_combination (comb, icode, otype, op0, >> + op1); >> }1 >> } >> } >>
On Mon, 11 May 2020, luoxhu wrote: > 在 2020-05-06 20:09,Richard Biener 写道: > > On Thu, 30 Apr 2020, luoxhu wrote: > > > >> Update the patch with overflow check. Bootstrap and regression tested PASS > >> on Power8-LE. > >> > >> > >> Use determine_value_range to get value range info for fold convert > >> expressions > >> with internal operation PLUS_EXPR/MINUS_EXPR/MULT_EXPR when not overflow on > >> wrapping overflow inner type. i.e.: > >> > >> (long unsigned int)((unsigned int)n * 10 + 1) > >> => > >> (long unsigned int)n * (long unsigned int)10 + (long unsigned int)1 > >> > >> With this patch for affine combination, load/store motion could detect > >> more address refs independency and promote some memory expressions to > >> registers within loop. > >> > >> PS: Replace the previous "(T1)(X + CST) as (T1)X - (T1)(-CST))" > >> to "(T1)(X + CST) as (T1)X + (T1)(CST))" for wrapping overflow. > > > > This is OK for trunk if bootstrapped / tested properl. > > > Bootstrap and regression tested pass on power8LE, committed to > r11-259-g0447929f11e6a3e1b076841712b90a8b6bc7d33a, is it necessary > to backport it to gcc-10&gcc-9? For sure not. Richard. > > Thanks, > Xionghu > > > > > > Thanks, > > Richard. > > > >> gcc/ChangeLog > >> > >> 2020-04-30 Xiong Hu Luo <luoxhu@linux.ibm.com> > >> > >> PR tree-optimization/83403 > >> * tree-affine.c (expr_to_aff_combination): Replace SSA_NAME with > >> determine_value_range, Add fold conversion of MULT_EXPR, fix the > >> previous PLUS_EXPR. > >> > >> gcc/testsuite/ChangeLog > >> > >> 2020-04-30 Xiong Hu Luo <luoxhu@linux.ibm.com> > >> > >> PR tree-optimization/83403 > >> * gcc.dg/tree-ssa/pr83403-1.c: New test. > >> * gcc.dg/tree-ssa/pr83403-2.c: New test. > >> * gcc.dg/tree-ssa/pr83403.h: New header. > >> --- > >> gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c | 8 ++++++ > >> gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c | 8 ++++++ > >> gcc/testsuite/gcc.dg/tree-ssa/pr83403.h | 30 > >> +++++++++++++++++++++++ > >> gcc/tree-affine.c | 24 ++++++++++-------- > >> 4 files changed, 60 insertions(+), 10 deletions(-) > >> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c > >> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c > >> create mode 100644 gcc/testsuite/gcc.dg/tree-ssa/pr83403.h > >> > >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c > >> b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c > >> new file mode 100644 > >> index 00000000000..748375b03af > >> --- /dev/null > >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c > >> @@ -0,0 +1,8 @@ > >> +/* { dg-do compile } */ > >> +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */ > >> + > >> +#define TYPE unsigned int > >> + > >> +#include "pr83403.h" > >> + > >> +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" > >> } } */ > >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c > >> b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c > >> new file mode 100644 > >> index 00000000000..ca2e6bbd61c > >> --- /dev/null > >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c > >> @@ -0,0 +1,8 @@ > >> +/* { dg-do compile } */ > >> +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */ > >> + > >> +#define TYPE int > >> + > >> +#include "pr83403.h" > >> + > >> +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" > >> } } */ > >> diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h > >> b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h > >> new file mode 100644 > >> index 00000000000..0da8a835b5f > >> --- /dev/null > >> +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h > >> @@ -0,0 +1,30 @@ > >> +__attribute__ ((noinline)) void > >> +calculate (const double *__restrict__ A, const double *__restrict__ B, > >> + double *__restrict__ C) > >> +{ > >> + TYPE m = 0; > >> + TYPE n = 0; > >> + TYPE k = 0; > >> + > >> + A = (const double *) __builtin_assume_aligned (A, 16); > >> + B = (const double *) __builtin_assume_aligned (B, 16); > >> + C = (double *) __builtin_assume_aligned (C, 16); > >> + > >> + for (n = 0; n < 9; n++) > >> + { > >> + for (m = 0; m < 10; m++) > >> + { > >> + C[(n * 10) + m] = 0.0; > >> + } > >> + > >> + for (k = 0; k < 17; k++) > >> + { > >> +#pragma simd > >> + for (m = 0; m < 10; m++) > >> + { > >> + C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k]; > >> + } > >> + } > >> + } > >> +} > >> + > >> diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c > >> index 0eb8db1b086..5620e6bf28f 100644 > >> --- a/gcc/tree-affine.c > >> +++ b/gcc/tree-affine.c > >> @@ -343,24 +343,28 @@ expr_to_aff_combination (aff_tree *comb, tree_code > >> code, tree type, > >> wide_int minv, maxv; > >> /* If inner type has wrapping overflow behavior, fold conversion > >> for below case: > >> - (T1)(X - CST) -> (T1)X - (T1)CST > >> - if X - CST doesn't overflow by range information. Also handle > >> - (T1)(X + CST) as (T1)(X - (-CST)). */ > >> + (T1)(X *+- CST) -> (T1)X *+- (T1)CST > >> + if X *+- CST doesn't overflow by range information. */ > >> if (TYPE_UNSIGNED (itype) > >> && TYPE_OVERFLOW_WRAPS (itype) > >> - && TREE_CODE (op0) == SSA_NAME > >> && TREE_CODE (op1) == INTEGER_CST > >> - && icode != MULT_EXPR > >> - && get_range_info (op0, &minv, &maxv) == VR_RANGE) > >> + && determine_value_range (op0, &minv, &maxv) == VR_RANGE) > >> { > >> + wi::overflow_type overflow = wi::OVF_NONE; > >> + signop sign = UNSIGNED; > >> if (icode == PLUS_EXPR) > >> - op1 = wide_int_to_tree (itype, -wi::to_wide (op1)); > >> - if (wi::geu_p (minv, wi::to_wide (op1))) > >> + wi::add (maxv, wi::to_wide (op1), sign, &overflow); > >> + else if (icode == MULT_EXPR) > >> + wi::mul (maxv, wi::to_wide (op1), sign, &overflow); > >> + else > >> + wi::sub (minv, wi::to_wide (op1), sign, &overflow); > >> + > >> + if (overflow == wi::OVF_NONE) > >> { > >> op0 = fold_convert (otype, op0); > >> op1 = fold_convert (otype, op1); > >> - return expr_to_aff_combination (comb, MINUS_EXPR, otype, > >> - op0, op1); > >> + return expr_to_aff_combination (comb, icode, otype, op0, > >> + op1); > >> }1 > >> } > >> } > >> > >
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c new file mode 100644 index 00000000000..748375b03af --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-1.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */ + +#define TYPE unsigned int + +#include "pr83403.h" + +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c new file mode 100644 index 00000000000..ca2e6bbd61c --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403-2.c @@ -0,0 +1,8 @@ +/* { dg-do compile } */ +/* { dg-options "-O3 -funroll-loops -fdump-tree-lim2-details" } */ + +#define TYPE int + +#include "pr83403.h" + +/* { dg-final { scan-tree-dump-times "Executing store motion of" 10 "lim2" } } */ diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h new file mode 100644 index 00000000000..0da8a835b5f --- /dev/null +++ b/gcc/testsuite/gcc.dg/tree-ssa/pr83403.h @@ -0,0 +1,30 @@ +__attribute__ ((noinline)) void +calculate (const double *__restrict__ A, const double *__restrict__ B, + double *__restrict__ C) +{ + TYPE m = 0; + TYPE n = 0; + TYPE k = 0; + + A = (const double *) __builtin_assume_aligned (A, 16); + B = (const double *) __builtin_assume_aligned (B, 16); + C = (double *) __builtin_assume_aligned (C, 16); + + for (n = 0; n < 9; n++) + { + for (m = 0; m < 10; m++) + { + C[(n * 10) + m] = 0.0; + } + + for (k = 0; k < 17; k++) + { +#pragma simd + for (m = 0; m < 10; m++) + { + C[(n * 10) + m] += A[(k * 20) + m] * B[(n * 20) + k]; + } + } + } +} + diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c index 0eb8db1b086..5620e6bf28f 100644 --- a/gcc/tree-affine.c +++ b/gcc/tree-affine.c @@ -343,24 +343,28 @@ expr_to_aff_combination (aff_tree *comb, tree_code code, tree type, wide_int minv, maxv; /* If inner type has wrapping overflow behavior, fold conversion for below case: - (T1)(X - CST) -> (T1)X - (T1)CST - if X - CST doesn't overflow by range information. Also handle - (T1)(X + CST) as (T1)(X - (-CST)). */ + (T1)(X *+- CST) -> (T1)X *+- (T1)CST + if X *+- CST doesn't overflow by range information. */ if (TYPE_UNSIGNED (itype) && TYPE_OVERFLOW_WRAPS (itype) - && TREE_CODE (op0) == SSA_NAME && TREE_CODE (op1) == INTEGER_CST - && icode != MULT_EXPR - && get_range_info (op0, &minv, &maxv) == VR_RANGE) + && determine_value_range (op0, &minv, &maxv) == VR_RANGE) { + wi::overflow_type overflow = wi::OVF_NONE; + signop sign = UNSIGNED; if (icode == PLUS_EXPR) - op1 = wide_int_to_tree (itype, -wi::to_wide (op1)); - if (wi::geu_p (minv, wi::to_wide (op1))) + wi::add (maxv, wi::to_wide (op1), sign, &overflow); + else if (icode == MULT_EXPR) + wi::mul (maxv, wi::to_wide (op1), sign, &overflow); + else + wi::sub (minv, wi::to_wide (op1), sign, &overflow); + + if (overflow == wi::OVF_NONE) { op0 = fold_convert (otype, op0); op1 = fold_convert (otype, op1); - return expr_to_aff_combination (comb, MINUS_EXPR, otype, - op0, op1); + return expr_to_aff_combination (comb, icode, otype, op0, + op1); }1 } }