Message ID | 20120521140417.GF16117@tyan-ft48-01.lab.bos.redhat.com |
---|---|
State | New |
Headers | show |
On Mon, May 21, 2012 at 4:04 PM, Jakub Jelinek <jakub@redhat.com> wrote: > Hi! > > If there are exactly 2 complex loads and some other loads in SLP instance, > we sometimes miscompile things because vect_supported_load_permutation_p > skips important checks. > > Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for > trunk/4.7? Ok. Thanks, Richard. > 2012-05-21 Jakub Jelinek <jakub@redhat.com> > > PR tree-optimization/53366 > * tree-vect-slp.c (vect_supported_load_permutation_p): Don't shortcut > tests if complex_numbers == 2, but there are non-complex number loads > too. > > * gcc.dg/torture/pr53366-1.c: New test. > * gcc.dg/torture/pr53366-2.c: New test. > * gcc.target/i386/pr53366-1.c: New test. > * gcc.target/i386/pr53366-2.c: New test. > > --- gcc/tree-vect-slp.c.jj 2012-04-19 11:09:13.000000000 +0200 > +++ gcc/tree-vect-slp.c 2012-05-21 12:46:40.078674688 +0200 > @@ -1199,7 +1199,8 @@ vect_supported_load_permutation_p (slp_i > > /* We checked that this case ok, so there is no need to proceed with > permutation tests. */ > - if (complex_numbers == 2) > + if (complex_numbers == 2 > + && VEC_length (slp_tree, SLP_INSTANCE_LOADS (slp_instn)) == 2) > { > VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn)); > VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn)); > --- gcc/testsuite/gcc.dg/torture/pr53366-1.c.jj 2012-05-21 12:55:47.220474343 +0200 > +++ gcc/testsuite/gcc.dg/torture/pr53366-1.c 2012-05-21 12:53:40.000000000 +0200 > @@ -0,0 +1,70 @@ > +/* PR tree-optimization/53366 */ > +/* { dg-do run } */ > + > +extern void abort (void); > + > +struct S { double v[3]; }; > +struct T { struct S r, i; }; > +struct U { struct T j[5]; }; > + > +void > +foo (struct U *__restrict p1, struct U *__restrict p2, > + struct S l1, struct S l2, struct S l3, struct S l4, > + const double _Complex * __restrict x, int y, int z) > +{ > + int i, j; > + while (y < z - 2) > + { > + for (j = 0; j < 5; ++j) > + { > + double a = __real__ x[5 * y + j]; > + double b = __imag__ x[5 * y + j]; > + double c = __real__ x[5 * (y + 2) + j]; > + double d = __imag__ x[5 * (y + 2) + j]; > + double e = __real__ x[5 * (y + 1) + j]; > + double f = __imag__ x[5 * (y + 1) + j]; > + double g = __real__ x[5 * (y + 3) + j]; > + double h = __imag__ x[5 * (y + 3) + j]; > + for (i = 0; i < 3; ++i) > + { > + p1->j[j].r.v[i] += l2.v[i] * a; > + p1->j[j].r.v[i] += l4.v[i] * c; > + p1->j[j].i.v[i] += l2.v[i] * b; > + p1->j[j].i.v[i] += l4.v[i] * d; > + p2->j[j].r.v[i] += l3.v[i] * e; > + p2->j[j].r.v[i] += l1.v[i] * g; > + p2->j[j].i.v[i] += l3.v[i] * f; > + p2->j[j].i.v[i] += l1.v[i] * h; > + } > + } > + y += 4; > + } > +} > + > +_Complex double x[5005]; > +struct U p1, p2; > + > +int > +main () > +{ > + int i, j; > + struct S l1, l2, l3, l4; > + for (i = 0; i < 5005; ++i) > + x[i] = i + 1.0iF * (2 * i); > + for (i = 0; i < 3; ++i) > + { > + l1.v[i] = 1; > + l2.v[i] = 2; > + l3.v[i] = 3; > + l4.v[i] = 4; > + } > + foo (&p1, &p2, l1, l2, l3, l4, x, 5, 1000); > + for (j = 0; j < 5; ++j) > + for (i = 0; i < 3; ++i) > + if (p1.j[j].r.v[i] != 3752430 + j * 1494.0 > + || p1.j[j].i.v[i] != p1.j[j].r.v[i] * 2 > + || p2.j[j].r.v[i] != 2502450 + j * 996.0 > + || p2.j[j].i.v[i] != p2.j[j].r.v[i] * 2) > + abort (); > + return 0; > +} > --- gcc/testsuite/gcc.dg/torture/pr53366-2.c.jj 2012-05-21 12:55:50.011459264 +0200 > +++ gcc/testsuite/gcc.dg/torture/pr53366-2.c 2012-05-21 12:54:48.000000000 +0200 > @@ -0,0 +1,43 @@ > +/* PR tree-optimization/53366 */ > +/* { dg-do run } */ > + > +extern void abort (void); > + > +struct T { float r[3], i[3]; }; > +struct U { struct T j[2]; }; > + > +void __attribute__ ((noinline)) > +foo (struct U *__restrict y, const float _Complex *__restrict x) > +{ > + int i, j; > + for (j = 0; j < 2; ++j) > + { > + float a = __real__ x[j]; > + float b = __imag__ x[j]; > + float c = __real__ x[j + 2]; > + float d = __imag__ x[j + 2]; > + for (i = 0; i < 3; ++i) > + { > + y->j[j].r[i] = y->j[j].r[i] + a + c; > + y->j[j].i[i] = y->j[j].i[i] + b + d; > + } > + } > +} > + > +_Complex float x[4]; > +struct U y; > + > +int > +main () > +{ > + int i, j; > + for (i = 0; i < 4; ++i) > + x[i] = i + 1.0iF * (2 * i); > + foo (&y, x); > + for (j = 0; j < 2; ++j) > + for (i = 0; i < 3; ++i) > + if (y.j[j].r[i] != __real__ (x[j] + x[j + 2]) > + || y.j[j].i[i] != __imag__ (x[j] + x[j + 2])) > + __builtin_abort (); > + return 0; > +} > --- gcc/testsuite/gcc.target/i386/pr53366-1.c.jj 2012-05-21 12:56:54.091092771 +0200 > +++ gcc/testsuite/gcc.target/i386/pr53366-1.c 2012-05-21 13:14:01.355210995 +0200 > @@ -0,0 +1,5 @@ > +/* PR tree-optimization/53366 */ > +/* { dg-do run { target avx_runtime } } */ > +/* { dg-options "-O3 -mavx" } */ > + > +#include "../../gcc.dg/torture/pr53366-1.c" > --- gcc/testsuite/gcc.target/i386/pr53366-2.c.jj 2012-05-21 12:56:56.868076994 +0200 > +++ gcc/testsuite/gcc.target/i386/pr53366-2.c 2012-05-21 13:14:08.358172604 +0200 > @@ -0,0 +1,5 @@ > +/* PR tree-optimization/53366 */ > +/* { dg-do run { target avx_runtime } } */ > +/* { dg-options "-O3 -mavx" } */ > + > +#include "../../gcc.dg/torture/pr53366-2.c" > > Jakub
--- gcc/tree-vect-slp.c.jj 2012-04-19 11:09:13.000000000 +0200 +++ gcc/tree-vect-slp.c 2012-05-21 12:46:40.078674688 +0200 @@ -1199,7 +1199,8 @@ vect_supported_load_permutation_p (slp_i /* We checked that this case ok, so there is no need to proceed with permutation tests. */ - if (complex_numbers == 2) + if (complex_numbers == 2 + && VEC_length (slp_tree, SLP_INSTANCE_LOADS (slp_instn)) == 2) { VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn)); VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn)); --- gcc/testsuite/gcc.dg/torture/pr53366-1.c.jj 2012-05-21 12:55:47.220474343 +0200 +++ gcc/testsuite/gcc.dg/torture/pr53366-1.c 2012-05-21 12:53:40.000000000 +0200 @@ -0,0 +1,70 @@ +/* PR tree-optimization/53366 */ +/* { dg-do run } */ + +extern void abort (void); + +struct S { double v[3]; }; +struct T { struct S r, i; }; +struct U { struct T j[5]; }; + +void +foo (struct U *__restrict p1, struct U *__restrict p2, + struct S l1, struct S l2, struct S l3, struct S l4, + const double _Complex * __restrict x, int y, int z) +{ + int i, j; + while (y < z - 2) + { + for (j = 0; j < 5; ++j) + { + double a = __real__ x[5 * y + j]; + double b = __imag__ x[5 * y + j]; + double c = __real__ x[5 * (y + 2) + j]; + double d = __imag__ x[5 * (y + 2) + j]; + double e = __real__ x[5 * (y + 1) + j]; + double f = __imag__ x[5 * (y + 1) + j]; + double g = __real__ x[5 * (y + 3) + j]; + double h = __imag__ x[5 * (y + 3) + j]; + for (i = 0; i < 3; ++i) + { + p1->j[j].r.v[i] += l2.v[i] * a; + p1->j[j].r.v[i] += l4.v[i] * c; + p1->j[j].i.v[i] += l2.v[i] * b; + p1->j[j].i.v[i] += l4.v[i] * d; + p2->j[j].r.v[i] += l3.v[i] * e; + p2->j[j].r.v[i] += l1.v[i] * g; + p2->j[j].i.v[i] += l3.v[i] * f; + p2->j[j].i.v[i] += l1.v[i] * h; + } + } + y += 4; + } +} + +_Complex double x[5005]; +struct U p1, p2; + +int +main () +{ + int i, j; + struct S l1, l2, l3, l4; + for (i = 0; i < 5005; ++i) + x[i] = i + 1.0iF * (2 * i); + for (i = 0; i < 3; ++i) + { + l1.v[i] = 1; + l2.v[i] = 2; + l3.v[i] = 3; + l4.v[i] = 4; + } + foo (&p1, &p2, l1, l2, l3, l4, x, 5, 1000); + for (j = 0; j < 5; ++j) + for (i = 0; i < 3; ++i) + if (p1.j[j].r.v[i] != 3752430 + j * 1494.0 + || p1.j[j].i.v[i] != p1.j[j].r.v[i] * 2 + || p2.j[j].r.v[i] != 2502450 + j * 996.0 + || p2.j[j].i.v[i] != p2.j[j].r.v[i] * 2) + abort (); + return 0; +} --- gcc/testsuite/gcc.dg/torture/pr53366-2.c.jj 2012-05-21 12:55:50.011459264 +0200 +++ gcc/testsuite/gcc.dg/torture/pr53366-2.c 2012-05-21 12:54:48.000000000 +0200 @@ -0,0 +1,43 @@ +/* PR tree-optimization/53366 */ +/* { dg-do run } */ + +extern void abort (void); + +struct T { float r[3], i[3]; }; +struct U { struct T j[2]; }; + +void __attribute__ ((noinline)) +foo (struct U *__restrict y, const float _Complex *__restrict x) +{ + int i, j; + for (j = 0; j < 2; ++j) + { + float a = __real__ x[j]; + float b = __imag__ x[j]; + float c = __real__ x[j + 2]; + float d = __imag__ x[j + 2]; + for (i = 0; i < 3; ++i) + { + y->j[j].r[i] = y->j[j].r[i] + a + c; + y->j[j].i[i] = y->j[j].i[i] + b + d; + } + } +} + +_Complex float x[4]; +struct U y; + +int +main () +{ + int i, j; + for (i = 0; i < 4; ++i) + x[i] = i + 1.0iF * (2 * i); + foo (&y, x); + for (j = 0; j < 2; ++j) + for (i = 0; i < 3; ++i) + if (y.j[j].r[i] != __real__ (x[j] + x[j + 2]) + || y.j[j].i[i] != __imag__ (x[j] + x[j + 2])) + __builtin_abort (); + return 0; +} --- gcc/testsuite/gcc.target/i386/pr53366-1.c.jj 2012-05-21 12:56:54.091092771 +0200 +++ gcc/testsuite/gcc.target/i386/pr53366-1.c 2012-05-21 13:14:01.355210995 +0200 @@ -0,0 +1,5 @@ +/* PR tree-optimization/53366 */ +/* { dg-do run { target avx_runtime } } */ +/* { dg-options "-O3 -mavx" } */ + +#include "../../gcc.dg/torture/pr53366-1.c" --- gcc/testsuite/gcc.target/i386/pr53366-2.c.jj 2012-05-21 12:56:56.868076994 +0200 +++ gcc/testsuite/gcc.target/i386/pr53366-2.c 2012-05-21 13:14:08.358172604 +0200 @@ -0,0 +1,5 @@ +/* PR tree-optimization/53366 */ +/* { dg-do run { target avx_runtime } } */ +/* { dg-options "-O3 -mavx" } */ + +#include "../../gcc.dg/torture/pr53366-2.c"