diff mbox

Fix vect_supported_load_permutation_p (PR tree-optimization/53366)

Message ID 20120521140417.GF16117@tyan-ft48-01.lab.bos.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek May 21, 2012, 2:04 p.m. UTC
Hi!

If there are exactly 2 complex loads and some other loads in SLP instance,
we sometimes miscompile things because vect_supported_load_permutation_p
skips important checks.

Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
trunk/4.7?

2012-05-21  Jakub Jelinek  <jakub@redhat.com>

	PR tree-optimization/53366
	* tree-vect-slp.c (vect_supported_load_permutation_p): Don't shortcut
	tests if complex_numbers == 2, but there are non-complex number loads
	too.

	* gcc.dg/torture/pr53366-1.c: New test.
	* gcc.dg/torture/pr53366-2.c: New test.
	* gcc.target/i386/pr53366-1.c: New test.
	* gcc.target/i386/pr53366-2.c: New test.


	Jakub

Comments

Richard Biener May 21, 2012, 2:09 p.m. UTC | #1
On Mon, May 21, 2012 at 4:04 PM, Jakub Jelinek <jakub@redhat.com> wrote:
> Hi!
>
> If there are exactly 2 complex loads and some other loads in SLP instance,
> we sometimes miscompile things because vect_supported_load_permutation_p
> skips important checks.
>
> Fixed thusly, bootstrapped/regtested on x86_64-linux and i686-linux, ok for
> trunk/4.7?

Ok.

Thanks,
Richard.

> 2012-05-21  Jakub Jelinek  <jakub@redhat.com>
>
>        PR tree-optimization/53366
>        * tree-vect-slp.c (vect_supported_load_permutation_p): Don't shortcut
>        tests if complex_numbers == 2, but there are non-complex number loads
>        too.
>
>        * gcc.dg/torture/pr53366-1.c: New test.
>        * gcc.dg/torture/pr53366-2.c: New test.
>        * gcc.target/i386/pr53366-1.c: New test.
>        * gcc.target/i386/pr53366-2.c: New test.
>
> --- gcc/tree-vect-slp.c.jj      2012-04-19 11:09:13.000000000 +0200
> +++ gcc/tree-vect-slp.c 2012-05-21 12:46:40.078674688 +0200
> @@ -1199,7 +1199,8 @@ vect_supported_load_permutation_p (slp_i
>
>   /* We checked that this case ok, so there is no need to proceed with
>      permutation tests.  */
> -  if (complex_numbers == 2)
> +  if (complex_numbers == 2
> +      && VEC_length (slp_tree, SLP_INSTANCE_LOADS (slp_instn)) == 2)
>     {
>       VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
>       VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
> --- gcc/testsuite/gcc.dg/torture/pr53366-1.c.jj 2012-05-21 12:55:47.220474343 +0200
> +++ gcc/testsuite/gcc.dg/torture/pr53366-1.c    2012-05-21 12:53:40.000000000 +0200
> @@ -0,0 +1,70 @@
> +/* PR tree-optimization/53366 */
> +/* { dg-do run } */
> +
> +extern void abort (void);
> +
> +struct S { double v[3]; };
> +struct T { struct S r, i; };
> +struct U { struct T j[5]; };
> +
> +void
> +foo (struct U *__restrict p1, struct U *__restrict p2,
> +     struct S l1, struct S l2, struct S l3, struct S l4,
> +     const double _Complex * __restrict x, int y, int z)
> +{
> +  int i, j;
> +  while (y < z - 2)
> +    {
> +      for (j = 0; j < 5; ++j)
> +       {
> +         double a = __real__ x[5 * y + j];
> +         double b = __imag__ x[5 * y + j];
> +         double c = __real__ x[5 * (y + 2) + j];
> +         double d = __imag__ x[5 * (y + 2) + j];
> +         double e = __real__ x[5 * (y + 1) + j];
> +         double f = __imag__ x[5 * (y + 1) + j];
> +         double g = __real__ x[5 * (y + 3) + j];
> +         double h = __imag__ x[5 * (y + 3) + j];
> +         for (i = 0; i < 3; ++i)
> +           {
> +             p1->j[j].r.v[i] += l2.v[i] * a;
> +             p1->j[j].r.v[i] += l4.v[i] * c;
> +             p1->j[j].i.v[i] += l2.v[i] * b;
> +             p1->j[j].i.v[i] += l4.v[i] * d;
> +             p2->j[j].r.v[i] += l3.v[i] * e;
> +             p2->j[j].r.v[i] += l1.v[i] * g;
> +             p2->j[j].i.v[i] += l3.v[i] * f;
> +             p2->j[j].i.v[i] += l1.v[i] * h;
> +           }
> +       }
> +      y += 4;
> +    }
> +}
> +
> +_Complex double x[5005];
> +struct U p1, p2;
> +
> +int
> +main ()
> +{
> +  int i, j;
> +  struct S l1, l2, l3, l4;
> +  for (i = 0; i < 5005; ++i)
> +    x[i] = i + 1.0iF * (2 * i);
> +  for (i = 0; i < 3; ++i)
> +    {
> +      l1.v[i] = 1;
> +      l2.v[i] = 2;
> +      l3.v[i] = 3;
> +      l4.v[i] = 4;
> +    }
> +  foo (&p1, &p2, l1, l2, l3, l4, x, 5, 1000);
> +  for (j = 0; j < 5; ++j)
> +    for (i = 0; i < 3; ++i)
> +      if (p1.j[j].r.v[i] != 3752430 + j * 1494.0
> +         || p1.j[j].i.v[i] != p1.j[j].r.v[i] * 2
> +         || p2.j[j].r.v[i] != 2502450 + j * 996.0
> +         || p2.j[j].i.v[i] != p2.j[j].r.v[i] * 2)
> +       abort ();
> +  return 0;
> +}
> --- gcc/testsuite/gcc.dg/torture/pr53366-2.c.jj 2012-05-21 12:55:50.011459264 +0200
> +++ gcc/testsuite/gcc.dg/torture/pr53366-2.c    2012-05-21 12:54:48.000000000 +0200
> @@ -0,0 +1,43 @@
> +/* PR tree-optimization/53366 */
> +/* { dg-do run } */
> +
> +extern void abort (void);
> +
> +struct T { float r[3], i[3]; };
> +struct U { struct T j[2]; };
> +
> +void __attribute__ ((noinline))
> +foo (struct U *__restrict y, const float _Complex *__restrict x)
> +{
> +  int i, j;
> +  for (j = 0; j < 2; ++j)
> +    {
> +      float a = __real__ x[j];
> +      float b = __imag__ x[j];
> +      float c = __real__ x[j + 2];
> +      float d = __imag__ x[j + 2];
> +      for (i = 0; i < 3; ++i)
> +        {
> +          y->j[j].r[i] = y->j[j].r[i] + a + c;
> +          y->j[j].i[i] = y->j[j].i[i] + b + d;
> +        }
> +    }
> +}
> +
> +_Complex float x[4];
> +struct U y;
> +
> +int
> +main ()
> +{
> +  int i, j;
> +  for (i = 0; i < 4; ++i)
> +    x[i] = i + 1.0iF * (2 * i);
> +  foo (&y, x);
> +  for (j = 0; j < 2; ++j)
> +    for (i = 0; i < 3; ++i)
> +      if (y.j[j].r[i] != __real__ (x[j] + x[j + 2])
> +          || y.j[j].i[i] != __imag__ (x[j] + x[j + 2]))
> +        __builtin_abort ();
> +  return 0;
> +}
> --- gcc/testsuite/gcc.target/i386/pr53366-1.c.jj        2012-05-21 12:56:54.091092771 +0200
> +++ gcc/testsuite/gcc.target/i386/pr53366-1.c   2012-05-21 13:14:01.355210995 +0200
> @@ -0,0 +1,5 @@
> +/* PR tree-optimization/53366 */
> +/* { dg-do run { target avx_runtime } } */
> +/* { dg-options "-O3 -mavx" } */
> +
> +#include "../../gcc.dg/torture/pr53366-1.c"
> --- gcc/testsuite/gcc.target/i386/pr53366-2.c.jj        2012-05-21 12:56:56.868076994 +0200
> +++ gcc/testsuite/gcc.target/i386/pr53366-2.c   2012-05-21 13:14:08.358172604 +0200
> @@ -0,0 +1,5 @@
> +/* PR tree-optimization/53366 */
> +/* { dg-do run { target avx_runtime } } */
> +/* { dg-options "-O3 -mavx" } */
> +
> +#include "../../gcc.dg/torture/pr53366-2.c"
>
>        Jakub
diff mbox

Patch

--- gcc/tree-vect-slp.c.jj	2012-04-19 11:09:13.000000000 +0200
+++ gcc/tree-vect-slp.c	2012-05-21 12:46:40.078674688 +0200
@@ -1199,7 +1199,8 @@  vect_supported_load_permutation_p (slp_i
 
   /* We checked that this case ok, so there is no need to proceed with 
      permutation tests.  */
-  if (complex_numbers == 2)
+  if (complex_numbers == 2
+      && VEC_length (slp_tree, SLP_INSTANCE_LOADS (slp_instn)) == 2)
     {
       VEC_free (slp_tree, heap, SLP_INSTANCE_LOADS (slp_instn));
       VEC_free (int, heap, SLP_INSTANCE_LOAD_PERMUTATION (slp_instn));
--- gcc/testsuite/gcc.dg/torture/pr53366-1.c.jj	2012-05-21 12:55:47.220474343 +0200
+++ gcc/testsuite/gcc.dg/torture/pr53366-1.c	2012-05-21 12:53:40.000000000 +0200
@@ -0,0 +1,70 @@ 
+/* PR tree-optimization/53366 */
+/* { dg-do run } */
+
+extern void abort (void);
+
+struct S { double v[3]; };
+struct T { struct S r, i; };
+struct U { struct T j[5]; };
+
+void
+foo (struct U *__restrict p1, struct U *__restrict p2,
+     struct S l1, struct S l2, struct S l3, struct S l4,
+     const double _Complex * __restrict x, int y, int z)
+{
+  int i, j;
+  while (y < z - 2)
+    {
+      for (j = 0; j < 5; ++j)
+	{
+	  double a = __real__ x[5 * y + j];
+	  double b = __imag__ x[5 * y + j];
+	  double c = __real__ x[5 * (y + 2) + j];
+	  double d = __imag__ x[5 * (y + 2) + j];
+	  double e = __real__ x[5 * (y + 1) + j];
+	  double f = __imag__ x[5 * (y + 1) + j];
+	  double g = __real__ x[5 * (y + 3) + j];
+	  double h = __imag__ x[5 * (y + 3) + j];
+	  for (i = 0; i < 3; ++i)
+	    {
+	      p1->j[j].r.v[i] += l2.v[i] * a;
+	      p1->j[j].r.v[i] += l4.v[i] * c;
+	      p1->j[j].i.v[i] += l2.v[i] * b;
+	      p1->j[j].i.v[i] += l4.v[i] * d;
+	      p2->j[j].r.v[i] += l3.v[i] * e;
+	      p2->j[j].r.v[i] += l1.v[i] * g;
+	      p2->j[j].i.v[i] += l3.v[i] * f;
+	      p2->j[j].i.v[i] += l1.v[i] * h;
+	    }
+	}
+      y += 4;
+    }
+}
+
+_Complex double x[5005];
+struct U p1, p2;
+
+int
+main ()
+{
+  int i, j;
+  struct S l1, l2, l3, l4;
+  for (i = 0; i < 5005; ++i)
+    x[i] = i + 1.0iF * (2 * i);
+  for (i = 0; i < 3; ++i)
+    {
+      l1.v[i] = 1;
+      l2.v[i] = 2;
+      l3.v[i] = 3;
+      l4.v[i] = 4;
+    }
+  foo (&p1, &p2, l1, l2, l3, l4, x, 5, 1000);
+  for (j = 0; j < 5; ++j)
+    for (i = 0; i < 3; ++i)
+      if (p1.j[j].r.v[i] != 3752430 + j * 1494.0
+	  || p1.j[j].i.v[i] != p1.j[j].r.v[i] * 2
+	  || p2.j[j].r.v[i] != 2502450 + j * 996.0
+	  || p2.j[j].i.v[i] != p2.j[j].r.v[i] * 2)
+	abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.dg/torture/pr53366-2.c.jj	2012-05-21 12:55:50.011459264 +0200
+++ gcc/testsuite/gcc.dg/torture/pr53366-2.c	2012-05-21 12:54:48.000000000 +0200
@@ -0,0 +1,43 @@ 
+/* PR tree-optimization/53366 */
+/* { dg-do run } */
+
+extern void abort (void);
+
+struct T { float r[3], i[3]; };
+struct U { struct T j[2]; };
+
+void __attribute__ ((noinline))
+foo (struct U *__restrict y, const float _Complex *__restrict x)
+{
+  int i, j;
+  for (j = 0; j < 2; ++j)
+    {
+      float a = __real__ x[j];
+      float b = __imag__ x[j];
+      float c = __real__ x[j + 2];
+      float d = __imag__ x[j + 2];
+      for (i = 0; i < 3; ++i)
+        {
+          y->j[j].r[i] = y->j[j].r[i] + a + c;
+          y->j[j].i[i] = y->j[j].i[i] + b + d;
+        }
+    }
+}
+
+_Complex float x[4];
+struct U y;
+
+int
+main ()
+{
+  int i, j;
+  for (i = 0; i < 4; ++i)
+    x[i] = i + 1.0iF * (2 * i);
+  foo (&y, x);
+  for (j = 0; j < 2; ++j)
+    for (i = 0; i < 3; ++i)
+      if (y.j[j].r[i] != __real__ (x[j] + x[j + 2])
+          || y.j[j].i[i] != __imag__ (x[j] + x[j + 2]))
+        __builtin_abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.target/i386/pr53366-1.c.jj	2012-05-21 12:56:54.091092771 +0200
+++ gcc/testsuite/gcc.target/i386/pr53366-1.c	2012-05-21 13:14:01.355210995 +0200
@@ -0,0 +1,5 @@ 
+/* PR tree-optimization/53366 */
+/* { dg-do run { target avx_runtime } } */
+/* { dg-options "-O3 -mavx" } */
+
+#include "../../gcc.dg/torture/pr53366-1.c"
--- gcc/testsuite/gcc.target/i386/pr53366-2.c.jj	2012-05-21 12:56:56.868076994 +0200
+++ gcc/testsuite/gcc.target/i386/pr53366-2.c	2012-05-21 13:14:08.358172604 +0200
@@ -0,0 +1,5 @@ 
+/* PR tree-optimization/53366 */
+/* { dg-do run { target avx_runtime } } */
+/* { dg-options "-O3 -mavx" } */
+
+#include "../../gcc.dg/torture/pr53366-2.c"