diff mbox

Re: vector shift regression on sparc

Message ID 20111031095305.GZ1052@tyan-ft48-01.lab.bos.redhat.com
State New
Headers show

Commit Message

Jakub Jelinek Oct. 31, 2011, 9:53 a.m. UTC
On Sun, Oct 30, 2011 at 12:38:32AM -0400, David Miller wrote:
> gcc.dg/pr48616.c segfaults on sparc as of a day or two ago
> 
> vectorizable_shift() crashes because op1_vectype is NULL and
> we hit this code path:
> 
>   /* Vector shifted by vector.  */
>   if (!scalar_shift_arg)
>     {
>       optab = optab_for_tree_code (code, vectype, optab_vector);
>       if (vect_print_dump_info (REPORT_DETAILS))
> 	fprintf (vect_dump, "vector/vector shift/rotate found.");
> =>    if (TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
> 
> dt[1] is vect_external_def and slp_node is non-NULL.
> 
> Indeed, when the 'dt' arg to vect_is_simple_use_1() is
> vect_external_def *vectype will be set to NULL.

Here is a fix for that (and other issues that show up on these
testcases with -O3 -mxop if I disable all vector/scalar shift expanders
in sse.md).
For SLP it currently gives up more often than for loop vectorization,
I assume we could handle all dt[1] == vect_constant_def
and dt[2] == vect_external_def cases for SLP (and at least the former
even if the constants differ between nodes) by building the vectors by hand,
though the current vect_get_vec_defs/vect_get_vec_defs_for_stmt_copy can't
be used for that as is.

2011-10-28  Jakub Jelinek  <jakub@redhat.com>

	* tree-vect-stmts.c (vectorizable_shift): If op1 is vect_external_def
	in a loop and has different type from op0, cast it to op0's type
	before the loop first.  For slp give up.  Don't crash if op1_vectype
	is NULL.

	* gcc.dg/vshift-3.c: New test.
	* gcc.dg/vshift-4.c: New test.
	* gcc.dg/vshift-5.c: New test.



	Jakub

Comments

Ira Rosen Oct. 31, 2011, 11:14 a.m. UTC | #1
On 31 October 2011 11:53, Jakub Jelinek <jakub@redhat.com> wrote:
> On Sun, Oct 30, 2011 at 12:38:32AM -0400, David Miller wrote:
>> gcc.dg/pr48616.c segfaults on sparc as of a day or two ago
>>
>> vectorizable_shift() crashes because op1_vectype is NULL and
>> we hit this code path:
>>
>>   /* Vector shifted by vector.  */
>>   if (!scalar_shift_arg)
>>     {
>>       optab = optab_for_tree_code (code, vectype, optab_vector);
>>       if (vect_print_dump_info (REPORT_DETAILS))
>>       fprintf (vect_dump, "vector/vector shift/rotate found.");
>> =>    if (TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
>>
>> dt[1] is vect_external_def and slp_node is non-NULL.
>>
>> Indeed, when the 'dt' arg to vect_is_simple_use_1() is
>> vect_external_def *vectype will be set to NULL.
>
> Here is a fix for that (and other issues that show up on these
> testcases with -O3 -mxop if I disable all vector/scalar shift expanders
> in sse.md).
> For SLP it currently gives up more often than for loop vectorization,
> I assume we could handle all dt[1] == vect_constant_def
> and dt[2] == vect_external_def cases for SLP (and at least the former
> even if the constants differ between nodes) by building the vectors by hand,
> though the current vect_get_vec_defs/vect_get_vec_defs_for_stmt_copy can't
> be used for that as is.
>
> 2011-10-28  Jakub Jelinek  <jakub@redhat.com>
>
>        * tree-vect-stmts.c (vectorizable_shift): If op1 is vect_external_def
>        in a loop and has different type from op0, cast it to op0's type
>        before the loop first.  For slp give up.  Don't crash if op1_vectype
>        is NULL.
>
>        * gcc.dg/vshift-3.c: New test.
>        * gcc.dg/vshift-4.c: New test.
>        * gcc.dg/vshift-5.c: New test.
>
> --- gcc/tree-vect-stmts.c.jj    2011-10-28 16:21:06.000000000 +0200
> +++ gcc/tree-vect-stmts.c       2011-10-31 10:27:57.000000000 +0100
> @@ -2446,7 +2446,10 @@ vectorizable_shift (gimple stmt, gimple_
>       optab = optab_for_tree_code (code, vectype, optab_vector);
>       if (vect_print_dump_info (REPORT_DETAILS))
>         fprintf (vect_dump, "vector/vector shift/rotate found.");
> -      if (TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
> +      if (!op1_vectype)
> +       op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
> +      if (op1_vectype == NULL_TREE
> +         || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
>        {
>          if (vect_print_dump_info (REPORT_DETAILS))
>            fprintf (vect_dump, "unusable type for last operand in"
> @@ -2480,9 +2483,28 @@ vectorizable_shift (gimple stmt, gimple_
>               /* Unlike the other binary operators, shifts/rotates have
>                  the rhs being int, instead of the same type as the lhs,
>                  so make sure the scalar is the right type if we are
> -                 dealing with vectors of short/char.  */
> +                dealing with vectors of long long/long/short/char.  */
>               if (dt[1] == vect_constant_def)
>                 op1 = fold_convert (TREE_TYPE (vectype), op1);
> +             else if (!useless_type_conversion_p (TREE_TYPE (vectype),
> +                                                  TREE_TYPE (op1)))

What happens in case dt[1] == vect_internal_def?

Thanks,
Ira

> +               {
> +                 if (slp_node
> +                     && TYPE_MODE (TREE_TYPE (vectype))
> +                        != TYPE_MODE (TREE_TYPE (op1)))
> +                   {
> +                     if (vect_print_dump_info (REPORT_DETAILS))
> +                     fprintf (vect_dump, "unusable type for last operand in"
> +                                         " vector/vector shift/rotate.");
> +                       return false;
> +                   }
> +                 if (vec_stmt && !slp_node)
> +                   {
> +                     op1 = fold_convert (TREE_TYPE (vectype), op1);
> +                     op1 = vect_init_vector (stmt, op1,
> +                                             TREE_TYPE (vectype), NULL);
> +                   }
> +               }
>             }
>         }
>     }

>
>
>        Jakub
>
Jakub Jelinek Oct. 31, 2011, 11:23 a.m. UTC | #2
On Mon, Oct 31, 2011 at 01:14:25PM +0200, Ira Rosen wrote:
> > --- gcc/tree-vect-stmts.c.jj    2011-10-28 16:21:06.000000000 +0200
> > +++ gcc/tree-vect-stmts.c       2011-10-31 10:27:57.000000000 +0100
> > @@ -2446,7 +2446,10 @@ vectorizable_shift (gimple stmt, gimple_
> >       optab = optab_for_tree_code (code, vectype, optab_vector);
> >       if (vect_print_dump_info (REPORT_DETAILS))
> >         fprintf (vect_dump, "vector/vector shift/rotate found.");
> > -      if (TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
> > +      if (!op1_vectype)
> > +       op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
> > +      if (op1_vectype == NULL_TREE
> > +         || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
> >        {
> >          if (vect_print_dump_info (REPORT_DETAILS))
> >            fprintf (vect_dump, "unusable type for last operand in"
> > @@ -2480,9 +2483,28 @@ vectorizable_shift (gimple stmt, gimple_
> >               /* Unlike the other binary operators, shifts/rotates have
> >                  the rhs being int, instead of the same type as the lhs,
> >                  so make sure the scalar is the right type if we are
> > -                 dealing with vectors of short/char.  */
> > +                dealing with vectors of long long/long/short/char.  */
> >               if (dt[1] == vect_constant_def)
> >                 op1 = fold_convert (TREE_TYPE (vectype), op1);
> > +             else if (!useless_type_conversion_p (TREE_TYPE (vectype),
> > +                                                  TREE_TYPE (op1)))
> 
> What happens in case dt[1] == vect_internal_def?

For !slp_node we can't reach this with dt1[1] == vect_internal_def,
because of:
  if (dt[1] == vect_internal_def && !slp_node)
    scalar_shift_arg = false;
And for slp_node I'm just giving up if type modes don't match:

> > +               {
> > +                 if (slp_node
> > +                     && TYPE_MODE (TREE_TYPE (vectype))
> > +                        != TYPE_MODE (TREE_TYPE (op1)))
> > +                   {
> > +                     if (vect_print_dump_info (REPORT_DETAILS))
> > +                     fprintf (vect_dump, "unusable type for last operand in"
> > +                                         " vector/vector shift/rotate.");
> > +                       return false;
> > +                   }

BTW, even the pre-existing if (dt[1] == vect_constant_def) doesn't seem to
be 100% correct for slp_node != NULL, I think vect_get_constant_vectors
will in that case create a VECTOR_CST with the desirable vector type
(same type mode as op0's vector type mode), but the constants in the
VECTOR_CST will have a wrong type (say V4DImode VECTOR_CST with
SImode constants in its constructor).  The expander doesn't ICE on it
though.

	Jakub
Ira Rosen Oct. 31, 2011, 11:36 a.m. UTC | #3
On 31 October 2011 13:23, Jakub Jelinek <jakub@redhat.com> wrote:
> On Mon, Oct 31, 2011 at 01:14:25PM +0200, Ira Rosen wrote:
>> > --- gcc/tree-vect-stmts.c.jj    2011-10-28 16:21:06.000000000 +0200
>> > +++ gcc/tree-vect-stmts.c       2011-10-31 10:27:57.000000000 +0100
>> > @@ -2446,7 +2446,10 @@ vectorizable_shift (gimple stmt, gimple_
>> >       optab = optab_for_tree_code (code, vectype, optab_vector);
>> >       if (vect_print_dump_info (REPORT_DETAILS))
>> >         fprintf (vect_dump, "vector/vector shift/rotate found.");
>> > -      if (TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
>> > +      if (!op1_vectype)
>> > +       op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
>> > +      if (op1_vectype == NULL_TREE
>> > +         || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
>> >        {
>> >          if (vect_print_dump_info (REPORT_DETAILS))
>> >            fprintf (vect_dump, "unusable type for last operand in"
>> > @@ -2480,9 +2483,28 @@ vectorizable_shift (gimple stmt, gimple_
>> >               /* Unlike the other binary operators, shifts/rotates have
>> >                  the rhs being int, instead of the same type as the lhs,
>> >                  so make sure the scalar is the right type if we are
>> > -                 dealing with vectors of short/char.  */
>> > +                dealing with vectors of long long/long/short/char.  */
>> >               if (dt[1] == vect_constant_def)
>> >                 op1 = fold_convert (TREE_TYPE (vectype), op1);
>> > +             else if (!useless_type_conversion_p (TREE_TYPE (vectype),
>> > +                                                  TREE_TYPE (op1)))
>>
>> What happens in case dt[1] == vect_internal_def?
>
> For !slp_node we can't reach this with dt1[1] == vect_internal_def,
> because of:
>  if (dt[1] == vect_internal_def && !slp_node)
>    scalar_shift_arg = false;
> And for slp_node I'm just giving up if type modes don't match:
>
>> > +               {
>> > +                 if (slp_node
>> > +                     && TYPE_MODE (TREE_TYPE (vectype))
>> > +                        != TYPE_MODE (TREE_TYPE (op1)))
>> > +                   {
>> > +                     if (vect_print_dump_info (REPORT_DETAILS))
>> > +                     fprintf (vect_dump, "unusable type for last operand in"
>> > +                                         " vector/vector shift/rotate.");
>> > +                       return false;
>> > +                   }
>

Ah, OK.

> BTW, even the pre-existing if (dt[1] == vect_constant_def) doesn't seem to
> be 100% correct for slp_node != NULL, I think vect_get_constant_vectors
> will in that case create a VECTOR_CST with the desirable vector type
> (same type mode as op0's vector type mode), but the constants in the
> VECTOR_CST will have a wrong type (say V4DImode VECTOR_CST with
> SImode constants in its constructor).  The expander doesn't ICE on it
> though.

Right. As you wrote before, we should probably change shift vectors
creation for SLP.

The patch is OK.

Thanks,
Ira

>
>        Jakub
>
diff mbox

Patch

--- gcc/tree-vect-stmts.c.jj	2011-10-28 16:21:06.000000000 +0200
+++ gcc/tree-vect-stmts.c	2011-10-31 10:27:57.000000000 +0100
@@ -2446,7 +2446,10 @@  vectorizable_shift (gimple stmt, gimple_
       optab = optab_for_tree_code (code, vectype, optab_vector);
       if (vect_print_dump_info (REPORT_DETAILS))
         fprintf (vect_dump, "vector/vector shift/rotate found.");
-      if (TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
+      if (!op1_vectype)
+	op1_vectype = get_same_sized_vectype (TREE_TYPE (op1), vectype_out);
+      if (op1_vectype == NULL_TREE
+	  || TYPE_MODE (op1_vectype) != TYPE_MODE (vectype))
 	{
 	  if (vect_print_dump_info (REPORT_DETAILS))
 	    fprintf (vect_dump, "unusable type for last operand in"
@@ -2480,9 +2483,28 @@  vectorizable_shift (gimple stmt, gimple_
               /* Unlike the other binary operators, shifts/rotates have
                  the rhs being int, instead of the same type as the lhs,
                  so make sure the scalar is the right type if we are
-                 dealing with vectors of short/char.  */
+		 dealing with vectors of long long/long/short/char.  */
               if (dt[1] == vect_constant_def)
                 op1 = fold_convert (TREE_TYPE (vectype), op1);
+	      else if (!useless_type_conversion_p (TREE_TYPE (vectype),
+						   TREE_TYPE (op1)))
+		{
+		  if (slp_node
+		      && TYPE_MODE (TREE_TYPE (vectype))
+			 != TYPE_MODE (TREE_TYPE (op1)))
+		    {
+		      if (vect_print_dump_info (REPORT_DETAILS))
+		      fprintf (vect_dump, "unusable type for last operand in"
+					  " vector/vector shift/rotate.");
+			return false;
+		    }
+		  if (vec_stmt && !slp_node)
+		    {
+		      op1 = fold_convert (TREE_TYPE (vectype), op1);
+		      op1 = vect_init_vector (stmt, op1,
+					      TREE_TYPE (vectype), NULL);
+		    }
+		}
             }
         }
     }
--- gcc/testsuite/gcc.dg/vshift-3.c.jj	2011-10-31 10:00:57.000000000 +0100
+++ gcc/testsuite/gcc.dg/vshift-3.c	2011-10-31 10:00:42.000000000 +0100
@@ -0,0 +1,136 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#include <stdlib.h>
+
+#define N 64
+
+#ifndef TYPE1
+#define TYPE1 int
+#define TYPE2 long long
+#endif
+
+signed TYPE1 a[N], b, g[N];
+unsigned TYPE1 c[N], h[N];
+signed TYPE2 d[N], e, j[N];
+unsigned TYPE2 f[N], k[N];
+
+#ifndef S
+#define S(x) x
+#endif
+
+__attribute__((noinline)) void
+f1 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    g[i] = a[i] << S (b);
+}
+
+__attribute__((noinline)) void
+f2 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    g[i] = a[i] >> S (b);
+}
+
+__attribute__((noinline)) void
+f3 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    h[i] = c[i] >> S (b);
+}
+
+__attribute__((noinline)) void
+f4 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    j[i] = d[i] << S (e);
+}
+
+__attribute__((noinline)) void
+f5 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    j[i] = d[i] >> S (e);
+}
+
+__attribute__((noinline)) void
+f6 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    k[i] = f[i] >> S (e);
+}
+
+__attribute__((noinline)) void
+f7 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    j[i] = d[i] << S (b);
+}
+
+__attribute__((noinline)) void
+f8 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    j[i] = d[i] >> S (b);
+}
+
+__attribute__((noinline)) void
+f9 (void)
+{
+  int i;
+  for (i = 0; i < N; i++)
+    k[i] = f[i] >> S (b);
+}
+
+int
+main ()
+{
+  int i;
+  b = 7;
+  e = 12;
+  for (i = 0; i < N; i++)
+    {
+      asm ("");
+      c[i] = (random () << 1) | (random () & 1);
+      a[i] = c[i];
+      d[i] = (random () << 1) | (random () & 1);
+      d[i] |= (unsigned long long) c[i] << 32;
+      f[i] = d[i];
+    }
+  f1 ();
+  f3 ();
+  f4 ();
+  f6 ();
+  for (i = 0; i < N; i++)
+    if (g[i] != (signed TYPE1) (a[i] << S (b))
+	|| h[i] != (unsigned TYPE1) (c[i] >> S (b))
+	|| j[i] != (signed TYPE2) (d[i] << S (e))
+	|| k[i] != (unsigned TYPE2) (f[i] >> S (e)))
+      abort ();
+  f2 ();
+  f5 ();
+  f9 ();
+  for (i = 0; i < N; i++)
+    if (g[i] != (signed TYPE1) (a[i] >> S (b))
+	|| j[i] != (signed TYPE2) (d[i] >> S (e))
+	|| k[i] != (unsigned TYPE2) (f[i] >> S (b)))
+      abort ();
+  f7 ();
+  for (i = 0; i < N; i++)
+    if (j[i] != (signed TYPE2) (d[i] << S (b)))
+      abort ();
+  f8 ();
+  for (i = 0; i < N; i++)
+    if (j[i] != (signed TYPE2) (d[i] >> S (b)))
+      abort ();
+  return 0;
+}
--- gcc/testsuite/gcc.dg/vshift-4.c.jj	2011-10-31 10:01:08.000000000 +0100
+++ gcc/testsuite/gcc.dg/vshift-4.c	2011-10-31 10:01:22.000000000 +0100
@@ -0,0 +1,6 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+#define S(x) 3
+
+#include "vshift-3.c"
--- gcc/testsuite/gcc.dg/vshift-5.c.jj	2011-10-31 10:33:09.000000000 +0100
+++ gcc/testsuite/gcc.dg/vshift-5.c	2011-10-31 10:32:57.000000000 +0100
@@ -0,0 +1,80 @@ 
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+extern void abort (void);
+long long a[16];
+
+__attribute__((noinline, noclone)) void
+f1 (void)
+{
+  long long a0, a1, a2, a3;
+  a0 = a[0];
+  a1 = a[1];
+  a2 = a[2];
+  a3 = a[3];
+  a0 = a0 << 2;
+  a1 = a1 << 3;
+  a2 = a2 << 4;
+  a3 = a3 << 5;
+  a[0] = a0;
+  a[1] = a1;
+  a[2] = a2;
+  a[3] = a3;
+}
+
+__attribute__((noinline, noclone)) void
+f2 (void)
+{
+  long long a0, a1, a2, a3;
+  a0 = a[0];
+  a1 = a[1];
+  a2 = a[2];
+  a3 = a[3];
+  a0 = a0 << 2;
+  a1 = a1 << 2;
+  a2 = a2 << 2;
+  a3 = a3 << 2;
+  a[0] = a0;
+  a[1] = a1;
+  a[2] = a2;
+  a[3] = a3;
+}
+
+__attribute__((noinline, noclone)) void
+f3 (int x)
+{
+  long long a0, a1, a2, a3;
+  a0 = a[0];
+  a1 = a[1];
+  a2 = a[2];
+  a3 = a[3];
+  a0 = a0 << x;
+  a1 = a1 << x;
+  a2 = a2 << x;
+  a3 = a3 << x;
+  a[0] = a0;
+  a[1] = a1;
+  a[2] = a2;
+  a[3] = a3;
+}
+
+int
+main ()
+{
+  a[0] = 4LL;
+  a[1] = 3LL;
+  a[2] = 2LL;
+  a[3] = 1LL;
+  f1 ();
+  if (a[0] != (4LL << 2) || a[1] != (3LL << 3)
+      || a[2] != (2LL << 4) || a[3] != (1LL << 5))
+    abort ();
+  f2 ();
+  if (a[0] != (4LL << 4) || a[1] != (3LL << 5)
+      || a[2] != (2LL << 6) || a[3] != (1LL << 7))
+    abort ();
+  f3 (3);
+  if (a[0] != (4LL << 7) || a[1] != (3LL << 8)
+      || a[2] != (2LL << 9) || a[3] != (1LL << 10))
+    abort ();
+}