Patchwork Fold vector to scalar optimization

login
register
mail settings
Submitter Artem Shinkarov
Date June 18, 2010, 2:24 p.m.
Message ID <AANLkTimvUBnsTQMbEbSFtUZw8Vx2ZX3MhZpen4djtx-C@mail.gmail.com>
Download mbox | patch
Permalink /patch/56194/
State New
Headers show

Comments

Artem Shinkarov - June 18, 2010, 2:24 p.m.
This patch was inspired by the previous patch I was working on
http://gcc.gnu.org/ml/gcc-patches/2010-06/msg01511.html and it should
be commited to the trunk only when vector shifting patch would be
commited. In other case the tests would not work.

The patch itself converts an operation "vector1 >> vector2" to
"vector1 >> scalar" if all the elements of the vector2 are the same.
At the moment the patch would not catch chains of variable assignmenrs
like:

b = x;
a = b;
c = a;
vec1 >> {x,a,b,c}

because the vector lowering is happening too early, though the
optimization could be moved down later.

ChangeLog:

2010-06-18  Artem Shinkarov <artyom.shinakroff@gmail.com>

      * passes.c (init_optimization_passes): Move veclower after SSA.
      * tree-vect-generic.c (expand_vector_operations_1): Handle conversion.

      testsuite/
      * gcc.dg/vec-scal-opt.c: New testcase.
      * gcc.dg/vec-scal-opt1.c: New testcase.
      * gcc.dg/vec-scal-opt2.c: New testcase.


bootstrapped and tested on x86_64_unknown-linux

OK?

Patch

Index: gcc/passes.c
===================================================================
--- gcc/passes.c	(revision 160943)
+++ gcc/passes.c	(working copy)
@@ -737,7 +737,6 @@  init_optimization_passes (void)
   NEXT_PASS (pass_refactor_eh);
   NEXT_PASS (pass_lower_eh);
   NEXT_PASS (pass_build_cfg);
-  NEXT_PASS (pass_lower_vector);
   NEXT_PASS (pass_warn_function_return);
   NEXT_PASS (pass_build_cgraph_edges);
   NEXT_PASS (pass_inline_parameters);
@@ -765,6 +764,7 @@  init_optimization_passes (void)
 
       NEXT_PASS (pass_referenced_vars);
       NEXT_PASS (pass_build_ssa);
+      NEXT_PASS (pass_lower_vector);
       NEXT_PASS (pass_early_warn_uninitialized);
       /* Note that it is not strictly necessary to schedule an early
 	 inline pass here.  However, some test cases (e.g.,
Index: gcc/tree-vect-generic.c
===================================================================
--- gcc/tree-vect-generic.c	(revision 160943)
+++ gcc/tree-vect-generic.c	(working copy)
@@ -436,8 +436,72 @@  expand_vector_operations_1 (gimple_stmt_
     {
       /* If the 2nd argument is vector, we need a vector/vector shift */
       if (VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs2))))
-	op = optab_for_tree_code (code, type, optab_vector);
-      else
+        {
+          /* Chack whether we have vector <op> {x,x,x,x} where x
+             could be a scalar variable or a constant. Transform
+             vector <op> {x,x,x,x} ==> vector <op> scalar.  */
+          if (TREE_CODE (rhs2) == SSA_NAME
+              && TREE_CODE (TREE_TYPE (rhs2)) == VECTOR_TYPE)
+            {
+              gimple def_stmt;
+              def_stmt = SSA_NAME_DEF_STMT (rhs2);
+
+              if (gimple_assign_single_p (def_stmt))
+                {
+                  tree var = gimple_assign_rhs1 (def_stmt);
+
+                  /* Check for {cst,cst,cst,...} case.  */
+                  if (TREE_CODE (var) == VECTOR_CST)
+                    {
+                      tree first, t, els = TREE_VECTOR_CST_ELTS (var);
+                      bool eq = true;
+                      
+                      first = TREE_VALUE (els);
+                      els = TREE_CHAIN (els);
+
+                      for (t = els; t; t = TREE_CHAIN (t))
+                        if (!(eq = operand_equal_p (first, TREE_VALUE(t), 0)))
+                          break;
+
+                      if (eq) /* All the elements are the same.  */
+                        {
+                          gimple_assign_set_rhs2 (stmt, first);
+                          update_stmt (stmt);
+                          rhs2 = first;
+                        }
+                    }
+                  /* Check for {x,x,x,...} case.  */
+                  else if (TREE_CODE (var) == CONSTRUCTOR)
+                    {
+                      tree first, t;
+                      bool eq = true;
+                      unsigned HOST_WIDE_INT i;
+
+                      FOR_EACH_CONSTRUCTOR_VALUE (CONSTRUCTOR_ELTS (var), i, t)
+                        {
+                          if (i == 0)
+                            {
+                              first = t;
+                              continue;
+                            }
+                          if (!(eq = (first == t)))
+                            break;
+                        }
+                      
+                      if (eq) /* All the elements are the same.  */
+                        {
+                          gimple_assign_set_rhs2 (stmt, first);
+                          update_stmt (stmt);
+                          rhs2 = first;
+                        }
+                    }
+                }
+            }
+          else    
+            op = optab_for_tree_code (code, type, optab_vector);
+        }
+
+      if (!VECTOR_MODE_P (TYPE_MODE (TREE_TYPE (rhs2))))
 	{
 	  /* Try for a vector/scalar shift, and if we don't have one, see if we
 	     have a vector/vector shift */
Index: gcc/testsuite/gcc.dg/vec-scal-opt1.c
===================================================================
--- gcc/testsuite/gcc.dg/vec-scal-opt1.c	(revision 0)
+++ gcc/testsuite/gcc.dg/vec-scal-opt1.c	(revision 0)
@@ -0,0 +1,20 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-veclower" } */
+
+#define vidx(type, vec, idx) (*((type *) &(vec) + idx))
+#define vector(elcount, type)  \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+short k;
+
+int main (int argc, char *argv[]) {
+   vector(8, short) v0 = {argc,2,3,4,5,6,7};
+   vector(8, short) r1;
+
+   r1 = v0 >> (vector(8, short)){2,2,2,2,2,2,2};
+
+   return vidx(short, r1, 0);
+}
+
+/* { dg-final { scan-tree-dump-times ">> 2" 1 "veclower" } } */
+/* { dg-final { cleanup-tree-dump "veclower" } } */
Index: gcc/testsuite/gcc.dg/vec-scal-opt2.c
===================================================================
--- gcc/testsuite/gcc.dg/vec-scal-opt2.c	(revision 0)
+++ gcc/testsuite/gcc.dg/vec-scal-opt2.c	(revision 0)
@@ -0,0 +1,19 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-veclower" } */
+
+#define vidx(type, vec, idx) (*((type *) &(vec) + idx))
+#define vector(elcount, type)  \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+int main (int argc, char *argv[]) {
+   vector(8, short) v0 = {argc,2,3,4,5,6,7};
+   vector(8, short) v1 = {2,2,2,2,2,2,2};
+   vector(8, short) r1;
+
+   r1 = v0 >> v1;
+
+   return vidx(short, r1, 0);
+}
+
+/* { dg-final { scan-tree-dump-times ">> 2" 1 "veclower" } } */
+/* { dg-final { cleanup-tree-dump "veclower" } } */
Index: gcc/testsuite/gcc.dg/vec-scal-opt.c
===================================================================
--- gcc/testsuite/gcc.dg/vec-scal-opt.c	(revision 0)
+++ gcc/testsuite/gcc.dg/vec-scal-opt.c	(revision 0)
@@ -0,0 +1,22 @@ 
+/* { dg-do compile } */
+/* { dg-options "-O -fdump-tree-veclower" } */
+
+#define vidx(type, vec, idx) (*((type *) &(vec) + idx))
+#define vector(elcount, type)  \
+__attribute__((vector_size((elcount)*sizeof(type)))) type
+
+short k;
+
+int main (int argc, char *argv[]) {
+   k = argc;
+   vector(8, short) v0 = {argc,2,3,4,5,6,7};
+   vector(8, short) v2 = {k,k,k,k,k,k,k,k};
+   vector(8, short) r1;
+
+   r1 = v0 >> v2;
+
+   return vidx(short, r1, 0);
+}
+
+/* { dg-final { scan-tree-dump-times ">> k.\[0-9_\]*" 1 "veclower" } } */
+/* { dg-final { cleanup-tree-dump "veclower" } } */