Prefer scalar offset in vector shifts

Submitted by Marc Glisse on May 12, 2013, 12:04 p.m.

Details

Message ID alpine.DEB.2.02.1305121354060.8343@stedding.saclay.inria.fr
State New
Headers show

Commit Message

Marc Glisse May 12, 2013, 12:04 p.m.
Hello,

this patch passes bootstrap+testsuite on x86_64-linux-gnu. When moving 
uniform_vector_p, I only added the gcc_assert. Note that the fold_binary 
patch helps for constant vectors, but not for {n,n,n,n}, which will 
require some help in forwprop for instance. This transformation is already 
done by the vector lowering pass, but that's too late in my opinion.

2013-05-13  Marc Glisse  <marc.glisse@inria.fr>

gcc/
 	* tree-vect-generic.c (uniform_vector_p): Move ...
 	* tree.c (uniform_vector_p): ... here.
 	* tree.h (uniform_vector_p): Declare it.
 	* fold-const.c (fold_binary_loc) <shift>: Turn the second argument
 	into a scalar.

gcc/testsuite/
 	* gcc.dg/vector-shift-2.c: New testcase.

Comments

Richard Guenther May 13, 2013, 8:01 a.m.
On Sun, May 12, 2013 at 2:04 PM, Marc Glisse <marc.glisse@inria.fr> wrote:
> Hello,
>
> this patch passes bootstrap+testsuite on x86_64-linux-gnu. When moving
> uniform_vector_p, I only added the gcc_assert. Note that the fold_binary
> patch helps for constant vectors, but not for {n,n,n,n}, which will require
> some help in forwprop for instance. This transformation is already done by
> the vector lowering pass, but that's too late in my opinion.

Ok.

Thanks,
Richard.

> 2013-05-13  Marc Glisse  <marc.glisse@inria.fr>
>
> gcc/
>         * tree-vect-generic.c (uniform_vector_p): Move ...
>         * tree.c (uniform_vector_p): ... here.
>         * tree.h (uniform_vector_p): Declare it.
>         * fold-const.c (fold_binary_loc) <shift>: Turn the second argument
>         into a scalar.
>
> gcc/testsuite/
>         * gcc.dg/vector-shift-2.c: New testcase.
>
> --
> Marc Glisse
> Index: gcc/testsuite/gcc.dg/vector-shift-2.c
> ===================================================================
> --- gcc/testsuite/gcc.dg/vector-shift-2.c       (revision 0)
> +++ gcc/testsuite/gcc.dg/vector-shift-2.c       (revision 0)
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-fdump-tree-ccp1" } */
> +
> +typedef unsigned vec __attribute__ ((vector_size (16)));
> +void
> +f (vec *a)
> +{
> +  vec s = { 5, 5, 5, 5 };
> +  *a = *a << s;
> +}
> +
> +/* { dg-final { scan-tree-dump "<< 5" "ccp1" } } */
> +/* { dg-final { cleanup-tree-dump "ccp1" } } */
>
> Property changes on: gcc/testsuite/gcc.dg/vector-shift-2.c
> ___________________________________________________________________
> Added: svn:keywords
>    + Author Date Id Revision URL
> Added: svn:eol-style
>    + native
>
> Index: gcc/tree-vect-generic.c
> ===================================================================
> --- gcc/tree-vect-generic.c     (revision 198803)
> +++ gcc/tree-vect-generic.c     (working copy)
> @@ -319,66 +319,20 @@ expand_vector_addition (gimple_stmt_iter
>        && parts_per_word >= 4
>        && TYPE_VECTOR_SUBPARTS (type) >= 4)
>      return expand_vector_parallel (gsi, f_parallel,
>                                    type, a, b, code);
>    else
>      return expand_vector_piecewise (gsi, f,
>                                     type, TREE_TYPE (type),
>                                     a, b, code);
>  }
>
> -/* Check if vector VEC consists of all the equal elements and
> -   that the number of elements corresponds to the type of VEC.
> -   The function returns first element of the vector
> -   or NULL_TREE if the vector is not uniform.  */
> -static tree
> -uniform_vector_p (tree vec)
> -{
> -  tree first, t;
> -  unsigned i;
> -
> -  if (vec == NULL_TREE)
> -    return NULL_TREE;
> -
> -  if (TREE_CODE (vec) == VECTOR_CST)
> -    {
> -      first = VECTOR_CST_ELT (vec, 0);
> -      for (i = 1; i < VECTOR_CST_NELTS (vec); ++i)
> -       if (!operand_equal_p (first, VECTOR_CST_ELT (vec, i), 0))
> -         return NULL_TREE;
> -
> -      return first;
> -    }
> -
> -  else if (TREE_CODE (vec) == CONSTRUCTOR)
> -    {
> -      first = error_mark_node;
> -
> -      FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec), i, t)
> -        {
> -          if (i == 0)
> -            {
> -              first = t;
> -              continue;
> -            }
> -         if (!operand_equal_p (first, t, 0))
> -           return NULL_TREE;
> -        }
> -      if (i != TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec)))
> -       return NULL_TREE;
> -
> -      return first;
> -    }
> -
> -  return NULL_TREE;
> -}
> -
>  /* Try to expand vector comparison expression OP0 CODE OP1 by
>     querying optab if the following expression:
>         VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
>     can be expanded.  */
>  static tree
>  expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
>                            tree op1, enum tree_code code)
>  {
>    tree t;
>    if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
> Index: gcc/tree.c
> ===================================================================
> --- gcc/tree.c  (revision 198803)
> +++ gcc/tree.c  (working copy)
> @@ -10126,20 +10126,68 @@ initializer_zerop (const_tree init)
>             return false;
>
>         return true;
>        }
>
>      default:
>        return false;
>      }
>  }
>
> +/* Check if vector VEC consists of all the equal elements and
> +   that the number of elements corresponds to the type of VEC.
> +   The function returns first element of the vector
> +   or NULL_TREE if the vector is not uniform.  */
> +tree
> +uniform_vector_p (const_tree vec)
> +{
> +  tree first, t;
> +  unsigned i;
> +
> +  if (vec == NULL_TREE)
> +    return NULL_TREE;
> +
> +  gcc_assert (VECTOR_TYPE_P (TREE_TYPE (vec)));
> +
> +  if (TREE_CODE (vec) == VECTOR_CST)
> +    {
> +      first = VECTOR_CST_ELT (vec, 0);
> +      for (i = 1; i < VECTOR_CST_NELTS (vec); ++i)
> +       if (!operand_equal_p (first, VECTOR_CST_ELT (vec, i), 0))
> +         return NULL_TREE;
> +
> +      return first;
> +    }
> +
> +  else if (TREE_CODE (vec) == CONSTRUCTOR)
> +    {
> +      first = error_mark_node;
> +
> +      FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec), i, t)
> +        {
> +          if (i == 0)
> +            {
> +              first = t;
> +              continue;
> +            }
> +         if (!operand_equal_p (first, t, 0))
> +           return NULL_TREE;
> +        }
> +      if (i != TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec)))
> +       return NULL_TREE;
> +
> +      return first;
> +    }
> +
> +  return NULL_TREE;
> +}
> +
>  /* Build an empty statement at location LOC.  */
>
>  tree
>  build_empty_stmt (location_t loc)
>  {
>    tree t = build1 (NOP_EXPR, void_type_node, size_zero_node);
>    SET_EXPR_LOCATION (t, loc);
>    return t;
>  }
>
> Index: gcc/tree.h
> ===================================================================
> --- gcc/tree.h  (revision 198803)
> +++ gcc/tree.h  (working copy)
> @@ -5284,20 +5284,25 @@ extern int fields_length (const_tree);
>
>  /* Returns the first FIELD_DECL in a type.  */
>
>  extern tree first_field (const_tree);
>
>  /* Given an initializer INIT, return TRUE if INIT is zero or some
>     aggregate of zeros.  Otherwise return FALSE.  */
>
>  extern bool initializer_zerop (const_tree);
>
> +/* Given a vector VEC, return its first element if all elements are
> +   the same.  Otherwise return NULL_TREE.  */
> +
> +extern tree uniform_vector_p (const_tree);
> +
>  /* Given a CONSTRUCTOR CTOR, return the element values as a vector.  */
>
>  extern vec<tree, va_gc> *ctor_to_vec (tree);
>
>  extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *,
>                                       HOST_WIDE_INT *, bool *);
>
>  extern bool complete_ctor_at_level_p (const_tree, HOST_WIDE_INT,
> const_tree);
>
>  /* integer_zerop (tree x) is nonzero if X is an integer constant of value
> 0.  */
> Index: gcc/fold-const.c
> ===================================================================
> --- gcc/fold-const.c    (revision 198803)
> +++ gcc/fold-const.c    (working copy)
> @@ -12402,20 +12402,26 @@ fold_binary_loc (location_t loc,
>         return omit_one_operand_loc (loc, type, arg0, arg1);
>        /* ... fall through ...  */
>
>      case LSHIFT_EXPR:
>      shift:
>        if (integer_zerop (arg1))
>         return non_lvalue_loc (loc, fold_convert_loc (loc, type, arg0));
>        if (integer_zerop (arg0))
>         return omit_one_operand_loc (loc, type, arg0, arg1);
>
> +      /* Prefer vector1 << scalar to vector1 << vector2
> +        if vector2 is uniform.  */
> +      if (VECTOR_TYPE_P (TREE_TYPE (arg1))
> +         && (tem = uniform_vector_p (arg1)) != NULL_TREE)
> +       return fold_build2_loc (loc, code, type, op0, tem);
> +
>        /* Since negative shift count is not well-defined,
>          don't try to compute it in the compiler.  */
>        if (TREE_CODE (arg1) == INTEGER_CST && tree_int_cst_sgn (arg1) < 0)
>         return NULL_TREE;
>
>        prec = element_precision (type);
>
>        /* Turn (a OP c1) OP c2 into a OP (c1+c2).  */
>        if (TREE_CODE (op0) == code && host_integerp (arg1, false)
>           && TREE_INT_CST_LOW (arg1) < prec
>
Jakub Jelinek May 13, 2013, 4:36 p.m.
On Sun, May 12, 2013 at 02:04:52PM +0200, Marc Glisse wrote:
> this patch passes bootstrap+testsuite on x86_64-linux-gnu. When
> moving uniform_vector_p, I only added the gcc_assert. Note that the
> fold_binary patch helps for constant vectors, but not for {n,n,n,n},
> which will require some help in forwprop for instance. This
> transformation is already done by the vector lowering pass, but
> that's too late in my opinion.
> 
> 2013-05-13  Marc Glisse  <marc.glisse@inria.fr>
> 
> gcc/
> 	* tree-vect-generic.c (uniform_vector_p): Move ...
> 	* tree.c (uniform_vector_p): ... here.
> 	* tree.h (uniform_vector_p): Declare it.
> 	* fold-const.c (fold_binary_loc) <shift>: Turn the second argument
> 	into a scalar.
> 
> gcc/testsuite/
> 	* gcc.dg/vector-shift-2.c: New testcase.

The testcase is UNSUPPORTED everywhere, because ccp1 dump isn't produced at
-O0.  Did you mean to add -O2 (or -O or -O3 etc.) to dg-options?

> --- gcc/testsuite/gcc.dg/vector-shift-2.c	(revision 0)
> +++ gcc/testsuite/gcc.dg/vector-shift-2.c	(revision 0)
> @@ -0,0 +1,13 @@
> +/* { dg-do compile } */
> +/* { dg-options "-fdump-tree-ccp1" } */
> +
> +typedef unsigned vec __attribute__ ((vector_size (16)));
> +void
> +f (vec *a)
> +{
> +  vec s = { 5, 5, 5, 5 };
> +  *a = *a << s;
> +}
> +
> +/* { dg-final { scan-tree-dump "<< 5" "ccp1" } } */
> +/* { dg-final { cleanup-tree-dump "ccp1" } } */

	Jakub
Marc Glisse May 13, 2013, 4:46 p.m.
On Mon, 13 May 2013, Jakub Jelinek wrote:

> On Sun, May 12, 2013 at 02:04:52PM +0200, Marc Glisse wrote:
>> this patch passes bootstrap+testsuite on x86_64-linux-gnu. When
>> moving uniform_vector_p, I only added the gcc_assert. Note that the
>> fold_binary patch helps for constant vectors, but not for {n,n,n,n},
>> which will require some help in forwprop for instance. This
>> transformation is already done by the vector lowering pass, but
>> that's too late in my opinion.
>>
>> 2013-05-13  Marc Glisse  <marc.glisse@inria.fr>
>>
>> gcc/
>> 	* tree-vect-generic.c (uniform_vector_p): Move ...
>> 	* tree.c (uniform_vector_p): ... here.
>> 	* tree.h (uniform_vector_p): Declare it.
>> 	* fold-const.c (fold_binary_loc) <shift>: Turn the second argument
>> 	into a scalar.
>>
>> gcc/testsuite/
>> 	* gcc.dg/vector-shift-2.c: New testcase.
>
> The testcase is UNSUPPORTED everywhere, because ccp1 dump isn't produced at
> -O0.

Argh! contrib/compare_tests only showed me:

New tests that PASS:

gcc.dg/vector-shift-2.c (test for excess errors)

And I happily went ahead...

> Did you mean to add -O2 (or -O or -O3 etc.) to dg-options?

-O is enough. Could you commit that please? Or I can do it in a couple 
hours.

Thanks for the notice and sorry for the breakage,
Jakub Jelinek May 13, 2013, 4:51 p.m.
On Mon, May 13, 2013 at 06:46:00PM +0200, Marc Glisse wrote:
> -O is enough. Could you commit that please? Or I can do it in a
> couple hours.

Done.

	Jakub

Patch hide | download patch | download mbox

Index: gcc/testsuite/gcc.dg/vector-shift-2.c
===================================================================
--- gcc/testsuite/gcc.dg/vector-shift-2.c	(revision 0)
+++ gcc/testsuite/gcc.dg/vector-shift-2.c	(revision 0)
@@ -0,0 +1,13 @@ 
+/* { dg-do compile } */
+/* { dg-options "-fdump-tree-ccp1" } */
+
+typedef unsigned vec __attribute__ ((vector_size (16)));
+void
+f (vec *a)
+{
+  vec s = { 5, 5, 5, 5 };
+  *a = *a << s;
+}
+
+/* { dg-final { scan-tree-dump "<< 5" "ccp1" } } */
+/* { dg-final { cleanup-tree-dump "ccp1" } } */

Property changes on: gcc/testsuite/gcc.dg/vector-shift-2.c
___________________________________________________________________
Added: svn:keywords
   + Author Date Id Revision URL
Added: svn:eol-style
   + native

Index: gcc/tree-vect-generic.c
===================================================================
--- gcc/tree-vect-generic.c	(revision 198803)
+++ gcc/tree-vect-generic.c	(working copy)
@@ -319,66 +319,20 @@  expand_vector_addition (gimple_stmt_iter
       && parts_per_word >= 4
       && TYPE_VECTOR_SUBPARTS (type) >= 4)
     return expand_vector_parallel (gsi, f_parallel,
 				   type, a, b, code);
   else
     return expand_vector_piecewise (gsi, f,
 				    type, TREE_TYPE (type),
 				    a, b, code);
 }
 
-/* Check if vector VEC consists of all the equal elements and
-   that the number of elements corresponds to the type of VEC.
-   The function returns first element of the vector
-   or NULL_TREE if the vector is not uniform.  */
-static tree
-uniform_vector_p (tree vec)
-{
-  tree first, t;
-  unsigned i;
-
-  if (vec == NULL_TREE)
-    return NULL_TREE;
-
-  if (TREE_CODE (vec) == VECTOR_CST)
-    {
-      first = VECTOR_CST_ELT (vec, 0);
-      for (i = 1; i < VECTOR_CST_NELTS (vec); ++i)
-	if (!operand_equal_p (first, VECTOR_CST_ELT (vec, i), 0))
-	  return NULL_TREE;
-
-      return first;
-    }
-
-  else if (TREE_CODE (vec) == CONSTRUCTOR)
-    {
-      first = error_mark_node;
-
-      FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec), i, t)
-        {
-          if (i == 0)
-            {
-              first = t;
-              continue;
-            }
-	  if (!operand_equal_p (first, t, 0))
-	    return NULL_TREE;
-        }
-      if (i != TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec)))
-	return NULL_TREE;
-
-      return first;
-    }
-
-  return NULL_TREE;
-}
-
 /* Try to expand vector comparison expression OP0 CODE OP1 by
    querying optab if the following expression:
 	VEC_COND_EXPR< OP0 CODE OP1, {-1,...}, {0,...}>
    can be expanded.  */
 static tree
 expand_vector_comparison (gimple_stmt_iterator *gsi, tree type, tree op0,
                           tree op1, enum tree_code code)
 {
   tree t;
   if (! expand_vec_cond_expr_p (type, TREE_TYPE (op0)))
Index: gcc/tree.c
===================================================================
--- gcc/tree.c	(revision 198803)
+++ gcc/tree.c	(working copy)
@@ -10126,20 +10126,68 @@  initializer_zerop (const_tree init)
 	    return false;
 
 	return true;
       }
 
     default:
       return false;
     }
 }
 
+/* Check if vector VEC consists of all the equal elements and
+   that the number of elements corresponds to the type of VEC.
+   The function returns first element of the vector
+   or NULL_TREE if the vector is not uniform.  */
+tree
+uniform_vector_p (const_tree vec)
+{
+  tree first, t;
+  unsigned i;
+
+  if (vec == NULL_TREE)
+    return NULL_TREE;
+
+  gcc_assert (VECTOR_TYPE_P (TREE_TYPE (vec)));
+
+  if (TREE_CODE (vec) == VECTOR_CST)
+    {
+      first = VECTOR_CST_ELT (vec, 0);
+      for (i = 1; i < VECTOR_CST_NELTS (vec); ++i)
+	if (!operand_equal_p (first, VECTOR_CST_ELT (vec, i), 0))
+	  return NULL_TREE;
+
+      return first;
+    }
+
+  else if (TREE_CODE (vec) == CONSTRUCTOR)
+    {
+      first = error_mark_node;
+
+      FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (vec), i, t)
+        {
+          if (i == 0)
+            {
+              first = t;
+              continue;
+            }
+	  if (!operand_equal_p (first, t, 0))
+	    return NULL_TREE;
+        }
+      if (i != TYPE_VECTOR_SUBPARTS (TREE_TYPE (vec)))
+	return NULL_TREE;
+
+      return first;
+    }
+
+  return NULL_TREE;
+}
+
 /* Build an empty statement at location LOC.  */
 
 tree
 build_empty_stmt (location_t loc)
 {
   tree t = build1 (NOP_EXPR, void_type_node, size_zero_node);
   SET_EXPR_LOCATION (t, loc);
   return t;
 }
 
Index: gcc/tree.h
===================================================================
--- gcc/tree.h	(revision 198803)
+++ gcc/tree.h	(working copy)
@@ -5284,20 +5284,25 @@  extern int fields_length (const_tree);
 
 /* Returns the first FIELD_DECL in a type.  */
 
 extern tree first_field (const_tree);
 
 /* Given an initializer INIT, return TRUE if INIT is zero or some
    aggregate of zeros.  Otherwise return FALSE.  */
 
 extern bool initializer_zerop (const_tree);
 
+/* Given a vector VEC, return its first element if all elements are
+   the same.  Otherwise return NULL_TREE.  */
+
+extern tree uniform_vector_p (const_tree);
+
 /* Given a CONSTRUCTOR CTOR, return the element values as a vector.  */
 
 extern vec<tree, va_gc> *ctor_to_vec (tree);
 
 extern bool categorize_ctor_elements (const_tree, HOST_WIDE_INT *,
 				      HOST_WIDE_INT *, bool *);
 
 extern bool complete_ctor_at_level_p (const_tree, HOST_WIDE_INT, const_tree);
 
 /* integer_zerop (tree x) is nonzero if X is an integer constant of value 0.  */
Index: gcc/fold-const.c
===================================================================
--- gcc/fold-const.c	(revision 198803)
+++ gcc/fold-const.c	(working copy)
@@ -12402,20 +12402,26 @@  fold_binary_loc (location_t loc,
 	return omit_one_operand_loc (loc, type, arg0, arg1);
       /* ... fall through ...  */
 
     case LSHIFT_EXPR:
     shift:
       if (integer_zerop (arg1))
 	return non_lvalue_loc (loc, fold_convert_loc (loc, type, arg0));
       if (integer_zerop (arg0))
 	return omit_one_operand_loc (loc, type, arg0, arg1);
 
+      /* Prefer vector1 << scalar to vector1 << vector2
+	 if vector2 is uniform.  */
+      if (VECTOR_TYPE_P (TREE_TYPE (arg1))
+	  && (tem = uniform_vector_p (arg1)) != NULL_TREE)
+	return fold_build2_loc (loc, code, type, op0, tem);
+
       /* Since negative shift count is not well-defined,
 	 don't try to compute it in the compiler.  */
       if (TREE_CODE (arg1) == INTEGER_CST && tree_int_cst_sgn (arg1) < 0)
 	return NULL_TREE;
 
       prec = element_precision (type);
 
       /* Turn (a OP c1) OP c2 into a OP (c1+c2).  */
       if (TREE_CODE (op0) == code && host_integerp (arg1, false)
 	  && TREE_INT_CST_LOW (arg1) < prec